In [25]:
import pandas as pd
from haversine import haversine, Unit
import time
import numpy as np
import warnings

warnings.simplefilter("ignore")

In [26]:
# Load the station dataset
bank = pd.read_csv('../data/Business_economy/bank_rate.csv')
bank

Unnamed: 0,Date Changed,Rate
0,11/05/2023,4.50
1,23/03/2023,4.25
2,02/02/2023,4.00
3,15/12/2022,3.50
4,03/11/2022,3.00
...,...,...
64,06/06/1996,5.69
65,08/03/1996,5.94
66,18/01/1996,6.13
67,13/12/1995,6.38


In [30]:
# Convert 'Date Changed' to datetime and extract the year
bank['Date Changed'] = pd.to_datetime(bank['Date Changed'])
bank['Year'] = bank['Date Changed'].dt.year
bank

In [31]:
# Group by year and calculate the mean rate for each year
annual_rates = bank.groupby('Year')['Rate'].mean().reset_index()

# Create a new DataFrame with continuous years
all_years = pd.DataFrame({'Year': range(annual_rates['Year'].min(), annual_rates['Year'].max() + 1)})

# Merge the new DataFrame with the original DataFrame
annual_rates = pd.merge(all_years, annual_rates, on='Year', how='left')

# Forward-fill the NaN values
annual_rates.fillna(method='ffill', inplace=True)
annual_rates

Unnamed: 0,Year,Rate
0,1995,6.505
1,1996,5.925
2,1997,6.75
3,1998,6.9375
4,1999,5.416667
5,2000,5.875
6,2001,4.964286
7,2002,4.964286
8,2003,3.666667
9,2004,4.375


In [32]:
# Specify the output file path for the merged CSV file
output_file_path = f'../data/Business_economy/bank_rate_filtered.csv'

# Write the merged_data DataFrame to a CSV file
annual_rates.to_csv(output_file_path, index=False)

In [10]:
import pandas_gbq

# Use the credentials of your service account, if necessary
# pandas_gbq.context.credentials = google.auth.credentials.Credentials.from_service_account_info()

# Assuming `df` is the DataFrame you want to upload
annual_rates.to_gbq('Banking_Dataset.Bank_rate', project_id='bold-circuit-389014', if_exists='replace')


100%|██████████| 1/1 [00:00<00:00, 8701.88it/s]


In [19]:
inflation_data = pd.read_csv('../data/Business_economy/CPIH.csv')
# Drop the rows where 'Year' is before 1995
inflation_data = inflation_data[inflation_data['Year'] >= 1995].dropna()

# Convert 'Year' to int
inflation_data['Year'] = inflation_data['Year'].astype(int)
inflation_data['Inflation rate'] = inflation_data['Inflation rate'].astype(float)

# Check the DataFrame
inflation_data


Unnamed: 0,Year,Inflation rate
13,1995,2.7
14,1996,2.9
15,1997,2.2
16,1998,1.8
17,1999,1.7
18,2000,1.2
19,2001,1.6
20,2002,1.5
21,2003,1.4
22,2004,1.4


In [17]:
# Specify the output file path for the merged CSV file
output_file_path = f'../data/Business_economy/CPIH_filtered.csv'

# Write the merged_data DataFrame to a CSV file
inflation_data.to_csv(output_file_path, index=False)

In [23]:
print(inflation_data.head())

    Year  Inflation rate
13  1995             2.7
14  1996             2.9
15  1997             2.2
16  1998             1.8
17  1999             1.7


In [20]:
# Assuming `df` is the DataFrame you want to upload
inflation_data.to_gbq('Banking_Dataset.CPIH', project_id='bold-circuit-389014', if_exists='replace')


100%|██████████| 1/1 [00:00<00:00, 18477.11it/s]
