# *Importing the libraries*

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# *Loading the data*

In [None]:
df = pd.read_csv("E:\Programming\Python for Data Science and Machine Learning\Datasets\salary_dataset.csv")
df = df.iloc[0:,1:]
df.rename(columns={'YearsExperience' : 'Experience', 'Salary' : 'Salary'}, inplace = True)
df.head()

# *Data Visualization*

In [None]:
# Creating a 1 x 2 canvas for scatterplot and heatmap 
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Determining the relatioship between independent and dependent variable by plotting a scatterplot
sns.scatterplot(data = df, x = 'Experience', y = 'Salary', ax = axes[0])
axes[0].set_title("Experience vs Salary plot")

# Plotting the heatmap
sns.heatmap(data = df.corr(), annot = True, cmap = 'coolwarm' )
axes[1].set_title("Correlation Matrix")
'''
From the correlation matrix, we understand that the correlation between the independent variable : "Experience" and the dependent variable : "Salary" have a strong correlation of 0.98, hence we can predict the salary using the input feature experince by designing the model based on Linear Regression
'''

# *Selecting and training the model*

In [None]:
x = df[['Experience']]
y = df['Salary']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

model = LinearRegression()
model.fit(x_train, y_train)

slope = model.coef_[0]
intercept = model.intercept_
print(f"Model: Salary = {slope:.2f} * Experience + {intercept:.2f}")

# *Prediction and Evaluation*

In [None]:
y_pred = model.predict(x_test)

mse = mean_squared_error(y_test, y_pred)
r2 =r2_score(y_test, y_pred)
print(f"\nMean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.2f}")

plt.figure(figsize=(6, 4))
sns.scatterplot(x = df['Experience'], y = df['Salary'], label='Actual')
plt.plot(x_test, y_pred, color ='red', label ='Regression Line')
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.title("Prediction vs Actual")
plt.legend()
plt.show()