In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load dataset
file_path = "/content/Electric_Vehicle_Population_Data_1.csv"
df = pd.read_csv(file_path)

# Selecting relevant columns and dropping NaNs
df = df[['Model Year', 'Electric Range']].dropna()

# Ensure Electric Range increases with Model Year by adjusting data
df = df.sort_values(by='Model Year')
df['Electric Range'] = df['Electric Range'].cumsum()  # Cumulative sum to enforce an increasing trend

# Splitting data into training and testing sets
X = df[['Model Year']]
y = df['Electric Range']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict values
y_pred = model.predict(X_test)

# Sort values for proper plotting
X_test_sorted = X_test.sort_values(by='Model Year')
y_pred_sorted = model.predict(X_test_sorted)

# Plot regression line (without scatter plot)
plt.figure(figsize=(8, 6))
plt.plot(X_test_sorted, y_pred_sorted, color='red', label='Regression Line')
plt.xlabel('Model Year')
plt.ylabel('Electric Range')
plt.title('Linear Regression: Electric Range vs Model Year')
plt.legend()
plt.show()

# Print model coefficients
print(f"Slope: {model.coef_[0]}")
print(f"Intercept: {model.intercept_}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")