In [36]:
# Created by: Michael Cullen
# 08/10/2024

In [37]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import ipywidgets as widgets

In [None]:
df = pd.read_csv('Average-prices-2024-06.csv', header=0)  # Header=0 to use the first row as column names

print(df.head())  # Display the first few rows to verify
print(df.columns)  # Display column names

In [42]:
widgets.Dropdown(
    options=['1', '2', '3'],
    value='2',
    description='Number:',
    disabled=False,
)

# Filter by the region/area of interest
area_of_interest = 'London'
df_area = df[df['Region_Name'] == area_of_interest]

In [None]:
if df_area.empty:
    print(f"No data found for region: {area_of_interest}")
else:
    # Convert 'Date' column to datetime format
    df_area['Date'] = pd.to_datetime(df_area['Date'])

    # Use every 5th sample from the dataset
    df_area['Years'] = pd.DatetimeIndex(df_area['Date']).year

    # Prepare features and target variable
    df_area_sampled = df_area.iloc[::2]

    # Prepare features (X) and target variable (y)
    x = df_area_sampled[['Years']]
    y = df_area_sampled['Average_Price']

    #split data
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

    # Train the model
    model = LinearRegression(fit_intercept=True)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_train)

    # Plot the data
    plt.xlabel('Date')
    plt.ylabel('Average Price (£)')
    plt.title(f'House Prices in {area_of_interest}')
    plt.legend()
    plt.scatter(X_train,y_train,alpha=0.5)
    plt.xlim(1970)
    plt.ylim(3000)
    plt.grid(True)
    plt.plot(X_train,y_pred,color='red')


In [None]:
print("Model slope:    ", model.coef_[0])
print("Model intercept:", model.intercept_)