# **Multiple Regression**

In [None]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt 
import seaborn as sns

In [None]:
# Dataset

data = {'Years of Experience': [1,2,3,4,5,6],
        'Education Level':[1,1,2,2,3,3],
        'Salary': [30,35,50,55,70,75]}

data = pd.DataFrame(data)

x = data[['Years of Experience','Education Level']]
y = data['Salary']

In [None]:
# Intercept

x = sm.add_constant(x)

In [None]:
# Model

model = sm.OLS(y,x).fit()
print(model.summary())

In [None]:
# Predict

new_data = pd.DataFrame({
                        'const':1,
                        'Years Of Experience':[6],
                        'Education Level':[3]})

predicted_salary = model.predict(new_data)
print(f"Predicted Salary: {predicted_salary[0]:.3f}")

In [None]:
# Dataset

df = pd.read_csv('NorthAmericaUniversities.csv', delimiter=",", encoding="ISO-8859-1")
df = df.dropna()

In [None]:
# Numeric Conversion

def numeric_conversion(x):
    if isinstance(x, str):
        x = x.replace("$","").replace(",","")
        if "B" in x:
            return float(x.replace("B",""))*1e9
        elif "M" in x:
            return float(x.replace("M",""))*1e6
        else:
            return float(x)
    else:
        return float(x)

df["Minimum Tuition cost"] = df["Minimum Tuition cost"].apply(numeric_conversion)
df["Endowment"] = df["Endowment"].apply(numeric_conversion)

In [None]:
# Heatmap

corr = df.corr(numeric_only=True)

plt.figure(figsize=(8,5))
sns.heatmap(corr, cmap='coolwarm',annot=True)
plt.show()

In [None]:
# Model

x = df[['Academic Staff', 'Endowment']]
y = df['Minimum Tuition cost']
x = sm.add_constant(x)
model = sm.OLS(y,x).fit()
print(model.summary())

In [None]:
# Predict

new_data = pd.DataFrame({
    'const': [1],
    'Academic Staff': [8189],
    'Endowment': [1.920000e+10] 
})

# Predict tuition
predicted_tuition = model.predict(new_data)
print(f"Predicted Tuition: ${predicted_tuition[0]:,.2f}")