### Importing the necessary libraries
- **pandas**: Used for data manipulation.
- **numpy**: Used for numerical computing and array operations.
- **matplotlib**: Used for data visualization.


In [2]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline

### create a variable to hold your dataset 

In [3]:
dataset = pd.read_csv('canada_per_capita_income.csv')

In [5]:
dataset.head(60) # display the contents of dataset 

Unnamed: 0,year,per capita income (US$)
0,1970,3399.299037
1,1971,3768.297935
2,1972,4251.175484
3,1973,4804.463248
4,1974,5576.514583
5,1975,5998.144346
6,1976,7062.131392
7,1977,7100.12617
8,1978,7247.967035
9,1979,7602.912681


### Visualize the data 

In [None]:
dataset.plot(x='year', y='per capita income (US$)', style='o')
plt.xlabel("area of land")
plt.ylabel("Price of land")
plt.show()

 ### Dataset Separation
- **x**: contains the independent variables,(all columns except the last one).
- **y**: contains the dependent variable, ( last column in the dataset)


In [7]:
x = dataset.iloc[:,:-1].values # select all rows and columns except last column with index -1 
y = dataset.iloc[:,1].values #select all rows and second column with index 1 

[ 3399.299037  3768.297935  4251.175484  4804.463248  5576.514583
  5998.144346  7062.131392  7100.12617   7247.967035  7602.912681
  8355.96812   9434.390652  9619.438377 10416.53659  10790.32872
 11018.95585  11482.89153  12974.80662  15080.28345  16426.72548
 16838.6732   17266.09769  16412.08309  15875.58673  15755.82027
 16369.31725  16699.82668  17310.75775  16622.67187  17581.02414
 18987.38241  18601.39724  19232.17556  22739.42628  25719.14715
 29198.05569  32738.2629   36144.48122  37446.48609  32755.17682
 38420.52289  42334.71121  42665.25597  42676.46837  41039.8936
 35175.18898  34229.19363 ]


In [None]:
# print (x)

### Split the dataset into training and testing sets
- **x_train:** data for training the model.
- **y_train:** corresponding target data for training.
- **x_test:** data for testing the model's performance.
- **y_test:** corresponding target data for testing.
- **test_size=0.2 :** specifies 20% data for testing and 80% for training
- **random_state=0 :** set random seed for random number generator , 0 MEANS Data split will be reproducible



In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state = 0 ) 

### Fitting the training data using linear regression

In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x_train, y_train)

### predicting the test data 

In [None]:
y_pred = reg.predict(x_test)
y_pred

### comparing the actual data with the test data 


In [None]:
df = pd.DataFrame({'Actual':y_test, 'Predicted':y_pred})
df

### show accuracy of prediction

In [None]:
from sklearn.metrics import r2_score

# Calculate R-squared (R²) score
r_squared = r2_score(y_test, y_pred)
print("R-squared (R²) score:", r_squared)


### R-squared score measures how well the linear regression model fits the data, with values ranging from 0 to 1. A higher R-squared score indicates a better f

In [None]:
# Visualize the data and the linear regression line
plt.scatter(x, y, color='blue', label='Actual Data')
plt.plot(x_test, y_pred, color='red', label='Linear Regression Line')
plt.xlabel('Year')
plt.ylabel('Per Capita Income (US$)')
plt.title('Linear Regression for Per Capita Income')
plt.legend()
plt.show()

### Accept the input from user for prediction

In [None]:

# Accept the user input for the year
user_year = int(input("Enter the year: "))

# Prepare the input data for prediction
user_input = np.array([[user_year]])

# Use the model to make a prediction
predicted_income = reg.predict(user_input)

# Display the predicted income to the user
print(f"Predicted per capita income for the year {user_year}: ${predicted_income[0]:.2f}")

### Data visualization after acceptig data from user 


In [None]:
# Visualize the data, linear regression line, and predicted data point
plt.scatter(x, y, color='blue', label='Actual Data')
plt.plot(x_test, y_pred, color='red', label='Linear Regression Line')
plt.scatter(user_year, predicted_income, color='red', marker='o', label='Predicted Data Point')

# Calculate R-squared (R²) score in percentage
accuracy_percentage = r_squared * 100

# Display accuracy as text on the plot
plt.text(2015, 30000, f'Accuracy: {accuracy_percentage:.2f}%', fontsize=12, color='green')
plt.text(2015, 20000, f'Predicted income for  {user_year}: ${predicted_income[0]:.2f}', fontsize=12, color='green')

plt.xlabel('Year')
plt.ylabel('Per Capita Income (US$)')
plt.title('Linear Regression for Per Capita Income')
plt.legend()
plt.show()
