# Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder

# In this project we predicting the salary prices by their different key points

# Accesing Dataset

In [3]:
df=pd.read_csv("ds_salaries.csv")
df.head()

Unnamed: 0,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,employee_residence,remote_ratio,company_location,company_size
0,2023,SE,FT,Principal Data Scientist,80000,EUR,85847,ES,100,ES,L
1,2023,MI,CT,ML Engineer,30000,USD,30000,US,100,US,S
2,2023,MI,CT,ML Engineer,25500,USD,25500,US,100,US,S
3,2023,SE,FT,Data Scientist,175000,USD,175000,CA,100,CA,M
4,2023,SE,FT,Data Scientist,120000,USD,120000,CA,100,CA,M


In [4]:
df.shape

(99, 11)

# Here we Drop unnecessary columns

In [5]:
df.drop(['salary_currency','employment_type','salary_in_usd','employee_residence','remote_ratio','company_size','company_location'],axis=1,inplace=True)

In [6]:
df

Unnamed: 0,work_year,experience_level,job_title,salary
0,2023,SE,Principal Data Scientist,80000
1,2023,MI,ML Engineer,30000
2,2023,MI,ML Engineer,25500
3,2023,SE,Data Scientist,175000
4,2023,SE,Data Scientist,120000
...,...,...,...,...
94,2023,SE,Data Scientist,70000
95,2023,EN,Machine Learning Engineer,163196
96,2023,EN,Machine Learning Engineer,145885
97,2023,SE,Data Engineer,217000


# let's check there is any null value ?

In [7]:
df.isnull().sum()

work_year           0
experience_level    0
job_title           0
salary              0
dtype: int64

In [8]:
df.describe()

Unnamed: 0,work_year,salary
count,99.0,99.0
mean,2022.979798,184896.7
std,0.141407,166759.3
min,2022.0,25500.0
25%,2023.0,121800.0
50%,2023.0,170000.0
75%,2023.0,217750.0
max,2023.0,1650000.0


In [9]:
df

Unnamed: 0,work_year,experience_level,job_title,salary
0,2023,SE,Principal Data Scientist,80000
1,2023,MI,ML Engineer,30000
2,2023,MI,ML Engineer,25500
3,2023,SE,Data Scientist,175000
4,2023,SE,Data Scientist,120000
...,...,...,...,...
94,2023,SE,Data Scientist,70000
95,2023,EN,Machine Learning Engineer,163196
96,2023,EN,Machine Learning Engineer,145885
97,2023,SE,Data Engineer,217000


# Data Cleaning

# using onehot encoding WE convert all categorical columns into a numerical

# here we coverting 'experience _level' column

In [10]:
df1=df[['experience_level']]
df1.head()

Unnamed: 0,experience_level
0,SE
1,MI
2,MI
3,SE
4,SE


In [11]:
oh_enc = OneHotEncoder(sparse=False,)
oh_enc_arr = oh_enc.fit_transform(df1[['experience_level']])

oh_enc_arr



array([[0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],


In [12]:
dummy_df=pd.get_dummies(df1[['experience_level']])
dummy_df.head(4)

Unnamed: 0,experience_level_EN,experience_level_EX,experience_level_MI,experience_level_SE
0,False,False,False,True
1,False,False,True,False
2,False,False,True,False
3,False,False,False,True


In [13]:
oh_enc = OneHotEncoder(sparse=False,drop='first')
oh_enc_arr = oh_enc.fit_transform(df1[['experience_level']])
oh_enc_arr



array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 1., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0

In [14]:
oh_enc_df = pd.DataFrame(oh_enc_arr, columns=['experience_level_EX','experience_level_MI','experience_level_SE'] ,)

oh_enc_df

Unnamed: 0,experience_level_EX,experience_level_MI,experience_level_SE
0,0.0,0.0,1.0
1,0.0,1.0,0.0
2,0.0,1.0,0.0
3,0.0,0.0,1.0
4,0.0,0.0,1.0
...,...,...,...
94,0.0,0.0,1.0
95,0.0,0.0,0.0
96,0.0,0.0,0.0
97,0.0,0.0,1.0


In [15]:
df1['experience_level_EX'] = oh_enc_df[['experience_level_SE']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['experience_level_EX'] = oh_enc_df[['experience_level_SE']].copy()


In [16]:
df1['experience_level_EX'] = oh_enc_df[['experience_level_MI']].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['experience_level_EX'] = oh_enc_df[['experience_level_MI']].copy()


In [17]:
df1['experience_level']=oh_enc_df['experience_level_EX'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['experience_level']=oh_enc_df['experience_level_EX'].copy()


In [18]:
df.update(df1)
df.head()

Unnamed: 0,work_year,experience_level,job_title,salary
0,2023,0.0,Principal Data Scientist,80000
1,2023,0.0,ML Engineer,30000
2,2023,0.0,ML Engineer,25500
3,2023,0.0,Data Scientist,175000
4,2023,0.0,Data Scientist,120000


# here we coverting 'job_title' column

In [19]:
df2=df[['job_title']]
df2.head()

Unnamed: 0,job_title
0,Principal Data Scientist
1,ML Engineer
2,ML Engineer
3,Data Scientist
4,Data Scientist


In [20]:
oh_enc = OneHotEncoder(sparse=False,)
oh_enc_arr = oh_enc.fit_transform(df2[['job_title']])

oh_enc_arr



array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [21]:
dummy_df=pd.get_dummies(df2[['job_title']])
dummy_df.head(4)

Unnamed: 0,job_title_AI Developer,job_title_Analytics Engineer,job_title_Applied Machine Learning Engineer,job_title_Applied Scientist,job_title_Business Intelligence Engineer,job_title_Compliance Data Analyst,job_title_Computer Vision Engineer,job_title_Data Analyst,job_title_Data Architect,job_title_Data Engineer,job_title_Data Modeler,job_title_Data Quality Analyst,job_title_Data Scientist,job_title_Data Strategist,job_title_ML Engineer,job_title_Machine Learning Engineer,job_title_Principal Data Scientist,job_title_Research Engineer
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False


In [22]:
oh_enc = OneHotEncoder(sparse=False,drop='first')
oh_enc_arr = oh_enc.fit_transform(df2[['job_title']])
oh_enc_arr



array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [23]:
oh_enc_df = pd.DataFrame(oh_enc_arr, columns=['job_title_AI Developer','job_title_Analytics Engineer','job_title_Applied Machine Learning Engineer','job_title_Applied Scientist','job_title_Business Intelligence Engineer','job_title_Compliance Data Analyst','job_title_Computer Vision Engineer','job_title_Data Analyst','job_title_Data Architect','job_title_Data Engineer','job_title_Data Modeler','job_title_Data Quality Analyst','job_title_Data Scientist','job_title_Data Strategist''job_title_ML Engineer','job_title_Machine Learning Engineer','job_title_Principal Data Scientist','job_title_Research Engineer'] ,)

oh_enc_df

Unnamed: 0,job_title_AI Developer,job_title_Analytics Engineer,job_title_Applied Machine Learning Engineer,job_title_Applied Scientist,job_title_Business Intelligence Engineer,job_title_Compliance Data Analyst,job_title_Computer Vision Engineer,job_title_Data Analyst,job_title_Data Architect,job_title_Data Engineer,job_title_Data Modeler,job_title_Data Quality Analyst,job_title_Data Scientist,job_title_Data Strategistjob_title_ML Engineer,job_title_Machine Learning Engineer,job_title_Principal Data Scientist,job_title_Research Engineer
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Research Engineer'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Research Engineer'].copy()


In [25]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Principal Data Scientist'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Principal Data Scientist'].copy()


In [26]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Machine Learning Engineer'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Machine Learning Engineer'].copy()


In [27]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Data Strategistjob_title_ML Engineer'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Data Strategistjob_title_ML Engineer'].copy()


In [28]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Data Scientist'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Data Scientist'].copy()


In [29]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Data Quality Analyst'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Data Quality Analyst'].copy()


In [30]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Data Modeler'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Data Modeler'].copy()


In [31]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Data Engineer'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Data Engineer'].copy()


In [32]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Data Architect'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Data Architect'].copy()


In [33]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Data Analyst'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Data Analyst'].copy()


In [34]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Computer Vision Engineer'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Computer Vision Engineer'].copy()


In [35]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Compliance Data Analyst'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Compliance Data Analyst'].copy()


In [36]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Business Intelligence Engineer'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Business Intelligence Engineer'].copy()


In [37]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Applied Scientist'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Applied Scientist'].copy()


In [38]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Applied Machine Learning Engineer'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Applied Machine Learning Engineer'].copy()


In [39]:
df2['job_title_AI Developer']=oh_enc_df['job_title_Analytics Engineer'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title_AI Developer']=oh_enc_df['job_title_Analytics Engineer'].copy()


In [40]:
df2['job_title']=oh_enc_df['job_title_AI Developer'].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['job_title']=oh_enc_df['job_title_AI Developer'].copy()


In [41]:
df.update(df2)
df.head()

Unnamed: 0,work_year,experience_level,job_title,salary
0,2023,0.0,0.0,80000
1,2023,0.0,0.0,30000
2,2023,0.0,0.0,25500
3,2023,0.0,0.0,175000
4,2023,0.0,0.0,120000


# We have total 2 categorical columns we covert all 2 columns into numerical

In [42]:
df.head()# checking changes in original df

Unnamed: 0,work_year,experience_level,job_title,salary
0,2023,0.0,0.0,80000
1,2023,0.0,0.0,30000
2,2023,0.0,0.0,25500
3,2023,0.0,0.0,175000
4,2023,0.0,0.0,120000


In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   work_year         99 non-null     int64 
 1   experience_level  99 non-null     object
 2   job_title         99 non-null     object
 3   salary            99 non-null     int64 
dtypes: int64(2), object(2)
memory usage: 3.2+ KB


# So Our Final converted Dataset is ready to work

# Spilting Data

In [44]:
x=df.drop("salary",axis=1)
y=df["salary"]
print("Shape of x",x.shape)
print("Shape of y",y.shape)

Shape of x (99, 3)
Shape of y (99,)


In [45]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.15, random_state=10)
print('Shape of X_train = ', x_train.shape)
print('Shape of y_train = ', y_train.shape)
print('Shape of X_test = ', x_test.shape)
print('Shape of y_test = ', y_test.shape)

Shape of X_train =  (84, 3)
Shape of y_train =  (84,)
Shape of X_test =  (15, 3)
Shape of y_test =  (15,)


# feature Scalling

In [46]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(x_train)

In [47]:
x_train_sc=sc.transform(x_train)
x_test_sc=sc.transform(x_test)

# using LinearRegression

In [48]:
from sklearn.linear_model import LinearRegression
lr= LinearRegression()
lr.fit(x,y)

In [49]:
lr.coef_

array([-807926.60215054,   54801.60215054,   19526.60215054])

In [50]:
lr.intercept_

1634602589.5483863

In [51]:
y_pred=(x_test)

In [52]:
lr.score(x_test,y_test)

0.7039574139974232

# using DecisionTreeRegressor

In [53]:
from sklearn.tree import DecisionTreeRegressor

In [54]:
regressor = DecisionTreeRegressor(criterion='squared_error')
regressor.fit(x_train,y_train)

In [55]:
y_test

19     150000
14     130760
43     110680
37     105380
66     237000
3      175000
80     510000
41    1650000
38      64500
68     309400
2       25500
1       30000
60     231500
53     199098
88     175000
Name: salary, dtype: int64

In [56]:
regressor.score(x_test,y_test)

0.09218722860433715

# using RandomForestRegressor


In [57]:
from sklearn.ensemble import RandomForestRegressor

In [58]:
Regressor=RandomForestRegressor(criterion="squared_error")
Regressor.fit(x_train,y_train)

In [59]:
y_test

19     150000
14     130760
43     110680
37     105380
66     237000
3      175000
80     510000
41    1650000
38      64500
68     309400
2       25500
1       30000
60     231500
53     199098
88     175000
Name: salary, dtype: int64

In [60]:
Regressor.score(x_test,y_test)

0.023187314887588517

# using svr

In [61]:
from sklearn.svm import SVR

In [62]:
from sklearn.linear_model import LinearRegression
lr= LinearRegression()
lr.fit(x_train,y_train)

In [63]:
svr_rbf=SVR(kernel='rbf')
svr_rbf.fit(x_train, y_train)
svr_rbf.score(x_test, y_test)

-0.0733470523407369

In [64]:
svr_linear=SVR(kernel='linear')
svr_linear.fit(x_train, y_train)
svr_linear.score(x_test, y_test)

-0.0733457270411002

In [65]:
svr_linear=SVR(kernel='poly')
svr_linear.fit(x_train, y_train)
svr_linear.score(x_test, y_test)

-0.07334705233187688

# using KNeighborsRegressor

In [66]:
from sklearn.neighbors import KNeighborsRegressor

In [80]:
regressor = KNeighborsRegressor(n_neighbors=3)
regressor.fit(x_train,y_train)

In [81]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()

In [82]:
sc.fit(x_train)

In [83]:
regressor.score(x_test,y_test)

-0.02310599168422467

In [71]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [72]:
print("MAE",mean_absolute_error(y_pred,x_test))

MAE 0.0


In [73]:
print("MSE",mean_squared_error(y_pred,x_test))

MSE 0.0


In [74]:
print("RMSE",np.sqrt(mean_squared_error(y_pred,x_test)))


RMSE 0.0


In [75]:
print("R_Squared",r2_score(y_pred,x_test))

R_Squared 1.0
