## 1) Load helper extractor class to process data for London only

In [6]:
from src.LondonExtractor import LondonFilter

In [7]:
filterer = LondonFilter(input_dir="./data/", output_path="./data/london.csv")
filterer.process()

Filtered London data saved to: ./data/london.csv


## 2) Read in London dataset

In [1]:
import pandas as pd

In [2]:
london_df = pd.read_csv("./data/london.csv")

In [3]:
# Convert to datetime
london_df['transfer_date'] = pd.to_datetime(london_df['transfer_date'], errors='coerce')

# Extract year
london_df['year'] = london_df['transfer_date'].dt.year


## 3) Exploratory Data Analysis

### 3.1) Transaction volume per month

In [None]:
import plotly.express as px

# Ensure the transfer_date is in datetime format
london_df['transfer_date'] = pd.to_datetime(london_df['transfer_date'], errors='coerce')

# Create a new column for month-year (use first day of month for consistency)
london_df['month_year'] = london_df['transfer_date'].dt.to_period('M').dt.to_timestamp()

# Aggregate by month-year and count the number of transactions
monthly_counts = london_df.groupby('month_year').size().reset_index(name='transaction_count')

# Plot with Plotly Express
fig = px.line(
    monthly_counts,
    x='month_year',
    y='transaction_count',
    title='Monthly Transaction Volume in London',
    labels={'month_year': 'Month-Year', 'transaction_count': 'Number of Transactions'}
)

# Customize layout
fig.update_layout(xaxis=dict(tickformat='%b %Y'), title_x=0.5)

fig.show()


### 3.1) MoM % change in transactions

In [6]:
# Aggregate by month and count transactions
monthly_counts = london_df.groupby('month_year').size().reset_index(name='transaction_count')

# Calculate MoM % change
monthly_counts['mom_percent_change'] = monthly_counts['transaction_count'].pct_change() * 100

# Plot MoM % change
fig = px.line(
    monthly_counts,
    x='month_year',
    y='mom_percent_change',
    title='Month-over-Month % Change in Transactions',
    labels={'month_year': 'Month-Year', 'mom_percent_change': 'MoM % Change'},
    markers=True
)

# Add zero line and format
fig.add_hline(y=0, line_dash="dash", line_color="gray")
fig.update_layout(xaxis=dict(tickformat='%b %Y'), title_x=0.5)

fig.show()
