<a href="https://colab.research.google.com/github/shroukdiaa/colab_notebook/blob/main/task1_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1- Import library**

In [None]:
import pandas as pd  # pandas for data manipulation.
import numpy as np  # numpy for numerical computations.
import matplotlib.pyplot as plt  # matplotlib or seaborn for visualization.
from sklearn.linear_model import LinearRegression
# scikit-learn for building and evaluating the regression mode

# **2- Load the Dataset**

In [None]:
# Load the dataset
data = pd.read_csv('/content/canada_per_capita_income.csv')

# Inspect the dataset
print(data.head())


In [None]:
print(data.columns)

# **3-  Visualize the Data**

In [None]:
plt.scatter(data['year'], data[data.columns[1]])
plt.xlabel('Year')
plt.ylabel('Per Capita Income')
plt.title('Canada Per Capita Income Over the Years')
plt.show()


# **4. Prepare the Data for the Model**

In [None]:
# Independent variable (X): year
X = data[['year']]

# Dependent variable (y): per capita income
y = data[data.columns[1]]


# **5. Build the Linear Regression Model**

In [None]:
# Create a linear regression model
model = LinearRegression()

# Train the model using the dataset
model.fit(X, y)


# **6. Predict for the Year 2020**

In [None]:
# Predict the per capita income for 2020
income_2020 = model.predict([[2020]])
print(f"Predicted per capita income for 2020: {income_2020[0]}")


# **7. Plot the Regression Line**

In [None]:
# Plot data points
plt.scatter(X, y, color='blue')

# Plot the regression line
plt.plot(X, model.predict(X), color='red')

plt.xlabel('Year')
plt.ylabel('Per Capita Income')
plt.title('Linear Regression for Canada Per Capita Income')
plt.show()


# **8. Evaluate the Model (Optional)**

In [None]:
# Get the R² score
r_squared = model.score(X, y)
print(f"R² score: {r_squared}")


# **Summary of Steps:**

---
1.   Import necessary libraries.
2.   Load and inspect the dataset.
3.  Visualize the data.
4. Prepare the data by splitting it into X and y.
5. Train a linear regression model.
6. Predict the value for 2020.
7.  Plot the regression line.
8. (Optional) Evaluate the model's performance.








# **Predict income for 2020**

In [None]:
years = np.array([1960, 1970, 1980, 1990, 2000, 2010])
income = np.array([400, 500, 600, 700, 800, 900])  # Example values, replace with real dataset

# Initialize parameters
theta_0 = 0
theta_1 = 0
alpha = 0.01  # Learning rate
iterations = 1000
m = len(years)

In [None]:
mean_year = np.mean(years)
range_year = np.max(years) - np.min(years)
years_normalized = (years - mean_year) / range_year


# **Hypothesis function**

In [None]:
def hypothesis(x):
    return theta_0 + theta_1 * x

# Cost function
def compute_cost():
    h = hypothesis(years_normalized)
    return (1 / (2 * m)) * np.sum((h - income) ** 2)

# Gradient Descent
def gradient_descent():
    global theta_0, theta_1
    cost_history = []

    for _ in range(iterations):
        h = hypothesis(years_normalized)
        theta_0 -= alpha * (1 / m) * np.sum(h - income)
        theta_1 -= alpha * (1 / m) * np.sum((h - income) * years_normalized)

        # Store cost for every iteration to track the improvement
        cost_history.append(compute_cost())

    return cost_history


In [None]:
cost_history = gradient_descent()
print(f"theta_0 (intercept): {theta_0}, theta_1 (slope): {theta_1}")

# **Predict income for 2020**

In [None]:
year_2020 = 2020
predicted_income_2020 = theta_0 + theta_1 * year_2020
print(f"Predicted income for 2020: {predicted_income_2020}")

# **Plot the cost function history**

In [None]:
plt.plot(cost_history)
plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.title("Cost Function during Gradient Descent")
plt.show()

# **Plot the regression line with the data points**

In [None]:
plt.scatter(years, income, color='red', label='Actual data')
predicted_income = theta_0 + theta_1 * years_normalized  # Predicted income
plt.plot(years, predicted_income, label='Linear regression', color='blue')
plt.xlabel("Year")
plt.ylabel("Per Capita Income")
plt.title("Linear Regression Fit")
plt.legend()
plt.show()