# Linear Regression

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

income_data = pd.read_csv('canada_per_capita_income.csv')
income_data.columns = income_data.columns.str.strip()
income_data = income_data.dropna()
X_income = income_data[['year']]
y_income = income_data.iloc[:, 1]
model_income = LinearRegression()
model_income.fit(X_income, y_income)
income_2020 = model_income.predict(pd.DataFrame([[2020]], columns=['year']))
print("Predicted per capita income for Canada in 2020:", income_2020[0])

salary_data = pd.read_csv('salary.csv')
salary_data.columns = salary_data.columns.str.strip()
salary_data = salary_data.dropna()
X_salary = salary_data[['YearsExperience']]
y_salary = salary_data['Salary']
model_salary = LinearRegression()
model_salary.fit(X_salary, y_salary)
salary_12 = model_salary.predict(pd.DataFrame([[12]], columns=['YearsExperience']))
print("Predicted salary for employee with 12 years of experience:", salary_12[0])


Predicted per capita income for Canada in 2020: 41288.69409441762
Predicted salary for employee with 12 years of experience: 139049.6749539778


# Multiple Regression

In [5]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

hiring_data = pd.read_csv('hiring.csv')
hiring_data.columns = hiring_data.columns.str.strip()

word_to_num = {
    'zero':0,'one':1,'two':2,'three':3,'four':4,'five':5,
    'six':6,'seven':7,'eight':8,'nine':9,'ten':10,
    'eleven':11,'twelve':12
}

hiring_data['experience'] = hiring_data['experience'].astype(str).str.lower().map(word_to_num).fillna(pd.to_numeric(hiring_data['experience'], errors='coerce'))
hiring_data = hiring_data.fillna(hiring_data.median(numeric_only=True))

X_hiring = hiring_data[['experience','test_score(out of 10)','interview_score(out of 10)']]
y_hiring = hiring_data['salary($)']

model_hiring = LinearRegression()
model_hiring.fit(X_hiring, y_hiring)

pred_hiring = model_hiring.predict(pd.DataFrame([[2,9,6],[12,10,10]], columns=X_hiring.columns))
print("Predicted salaries:", pred_hiring)

companies_data = pd.read_csv('1000_Companies.csv')
companies_data.columns = companies_data.columns.str.strip()
companies_data = companies_data.dropna()

X_companies = companies_data[['R&D Spend','Administration','Marketing Spend']]
state_dummies = pd.get_dummies(companies_data['State'], drop_first=True)
X_companies = pd.concat([X_companies, state_dummies], axis=1)
y_companies = companies_data['Profit']

model_companies = LinearRegression()
model_companies.fit(X_companies, y_companies)

input_state = pd.DataFrame([[91694.48,515841.3,11931.24]], columns=['R&D Spend','Administration','Marketing Spend'])
input_state = pd.concat([input_state, pd.DataFrame([[1,0]], columns=state_dummies.columns)], axis=1)

pred_profit = model_companies.predict(input_state)
print("Predicted profit:", pred_profit[0])


Predicted salaries: [47056.91056911 88227.64227642]
Predicted profit: 510570.9926108309
