In [20]:
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression as lr
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sbn

In [21]:
sbn.set()

In [22]:
cars_data = pd.read_csv('cars_2000_2024.csv')

In [23]:
print(cars_data.head())

   Year Car brand  Number of Cars sold
0  2000    toyota                40563
1  2000       bmw                28433
2  2000     honda                43184
3  2000      ford                82596
4  2000    nissan                57383


In [24]:
def plot_sales(car_brand):
    brand_data = cars_data[cars_data['Car brand'].str.lower() == car_brand.lower()]
    if brand_data.empty:
        print(f"Brand '{car_brand}' not found in data.")
        return

    x = brand_data[['Year']].values
    y = brand_data[['Number of Cars sold']].values
    model = lr()
    model.fit(x, y)
    y_pred = model.predict(x)
    pred_2025 = model.predict(np.array([[2025]]))

    plt.figure(figsize=(10, 6))
    plt.scatter(x, y, color='blue', label='Actual Sales')
    plt.plot(x, y_pred, color='red', label='Predicted Sales')
    plt.plot([x[-1][0], 2025], [y_pred[-1][0], pred_2025[0][0]], 'g--', label='2025 Prediction')
    plt.xlabel('Year')
    plt.ylabel('Number of cars sold')
    plt.title(f'{car_brand.title()} Sales & 2025 Prediction')
    plt.legend()
    os.makedirs('output', exist_ok=True)
    plt.savefig(f'output/{car_brand.lower()}_sales_and_prediction.png')
    plt.close()
    print(f"2025 predicted sales for {car_brand.title()}: {int(pred_2025[0][0])}")
    print(f'Intercept: {model.intercept_[0]}')
    print(f'Coefficient: {model.coef_[0][0]}')
    print(f'R2 Score: {r2_score(y, y_pred)}')
    mse = mean_squared_error(y, y_pred)
    rmse = np.sqrt(mse)
    print(f'RMSE: {rmse}')

In [25]:
plot_sales('toyota')

2025 predicted sales for Toyota: 78869
Intercept: -3448764.95826087
Coefficient: 1742.041739130435
R2 Score: 0.19087748972194052
RMSE: 24827.422802213227


In [26]:
plot_sales('ford')

2025 predicted sales for Ford: 48532
Intercept: 189439.16652173884
Coefficient: -69.58347826086941
R2 Score: 0.00037492300426600345
RMSE: 24871.212626654407
