In [1]:
import pandas as pd

# Actual GDP Data

In [2]:
gdp_actual = pd.read_csv('36100104.csv')
gdp_actual['REF_DATE'] = pd.to_datetime(gdp_actual['REF_DATE'], format='%Y-%m')

# Apply the filter to select GDP 
gdp_actual = gdp_actual[
    (gdp_actual['Prices'] == 'Current prices') &
    (gdp_actual['Seasonal adjustment'] == 'Seasonally adjusted at annual rates') &
    (gdp_actual['Estimates'] == 'Gross domestic product at market prices') &
    (gdp_actual['UOM'] == 'Dollars')
]

gdp_actual = gdp_actual[['REF_DATE', 'VALUE']]
gdp_actual.rename(columns={'REF_DATE': 'Date', 'VALUE': 'Actual GDP'}, inplace=True)

gdp_actual.reset_index(drop=True, inplace=True)

gdp_actual.head()

Unnamed: 0,Date,Actual GDP
0,1961-01-01,40600.0
1,1961-04-01,41496.0
2,1961-07-01,42604.0
3,1961-10-01,43404.0
4,1962-01-01,44624.0


# Actual Population Data

In [3]:
pop_actual = pd.read_csv('17100009.csv')
pop_actual['REF_DATE'] = pd.to_datetime(pop_actual['REF_DATE'], format='%Y-%m')
pop_actual = pop_actual[pop_actual['GEO'] == "Canada"]
pop_actual = pop_actual[['REF_DATE', 'VALUE']]
pop_actual.rename(columns={'REF_DATE': 'Date', 'VALUE': 'Actual Population'}, inplace=True)
pop_actual.head()


Unnamed: 0,Date,Actual Population
0,1946-01-01,12188000
1,1946-04-01,12241000
2,1946-07-01,12316000
3,1946-10-01,12393000
4,1947-01-01,12450000


# Forecast GDP Data

In [4]:
gdp_forecast = pd.read_csv('GDP_forecast.csv')
gdp_forecast.rename(columns={'Unnamed: 0': 'Date', 'y_pred': 'GDP Forecast'}, inplace=True)
gdp_forecast['Date'] = pd.to_datetime(gdp_forecast['Date'])
gdp_forecast.head()

Unnamed: 0,Date,GDP Forecast
0,2024-01-01,2963008.0
1,2024-04-01,2973635.0
2,2024-07-01,2984263.0
3,2024-10-01,2994890.0
4,2025-01-01,3005518.0


# Population Forecast Data

In [5]:
pop_forecast = pd.read_csv('canada_population_forecast.csv')
pop_forecast['ds'] = pd.to_datetime(pop_forecast['ds'])
pop_forecast.rename(columns={'ds': 'Date', 'AutoARIMA': 'Population Forecast'}, inplace=True)
pop_forecast = pop_forecast[['Date', 'Population Forecast']]
pop_forecast.head()

Unnamed: 0,Date,Population Forecast
0,2024-04-01,41047440.0
1,2024-07-01,41391092.0
2,2024-10-01,41726428.0
3,2025-01-01,42031148.0
4,2025-04-01,42335964.0


In [6]:
print(gdp_forecast.dtypes)
print(pop_forecast.dtypes)

Date            datetime64[ns]
GDP Forecast           float64
dtype: object
Date                   datetime64[ns]
Population Forecast           float64
dtype: object


# Consolidation

In [7]:
import pandas as pd

# Assuming the dataframes gdp_actual, pop_actual, gdp_forecast, and pop_forecast already exist

# First, let's merge the actual dataframes on the 'Date' column
actual_data_merged = pd.merge(gdp_actual, pop_actual, on='Date', how='outer')

# Then, merge the forecast dataframes on the 'Date' column
forecast_data_merged = pd.merge(gdp_forecast, pop_forecast, on='Date', how='outer')

# Now, we'll merge the actual and forecast merged dataframes
final_data = pd.merge(actual_data_merged, forecast_data_merged, on='Date', how='outer')

# Calculate Actual GDP per capita by dividing Actual GDP by Actual Population
final_data['Actual GDP per capita'] = final_data['Actual GDP']*1000000 / final_data['Actual Population']

# Calculate GDP per capita Forecast by dividing GDP Forecast by Population Forecast
final_data['GDP per capita Forecast'] = final_data['GDP Forecast']*1000000 / final_data['Population Forecast']

final_data = final_data.sort_values('Date')

# Check if the final dataframe is as expected
final_data.head()


Unnamed: 0,Date,Actual GDP,Actual Population,GDP Forecast,Population Forecast,Actual GDP per capita,GDP per capita Forecast
252,1946-01-01,,12188000.0,,,,
253,1946-04-01,,12241000.0,,,,
254,1946-07-01,,12316000.0,,,,
255,1946-10-01,,12393000.0,,,,
256,1947-01-01,,12450000.0,,,,


In [8]:
final_data.to_csv('final_data.csv', index=False)

In [9]:
import plotly.express as px

# Assuming your dataframe is named 'final_data'

# Plotting the line graph
fig = px.line(final_data, x='Date', y=['Actual GDP', 'Actual Population', 'GDP Forecast', 'Population Forecast',        'Actual GDP per capita', 'GDP per capita Forecast'],
              title='GDP, Population, and GDP per capita')

# Show the plot
fig.show()


In [10]:
# 2030: 66K -- USD 47K
# 2040: 59K -- USD 42K
# 2050: 55K -- USD 39K

In [11]:
import plotly.express as px

# Assuming your dataframe is named 'final_data'

# Plotting the line graph
fig = px.line(final_data, x='Date', y=['Actual GDP', 'Actual Population', 'GDP Forecast', 'Population Forecast',        'Actual GDP per capita', 'GDP per capita Forecast'],
              title='GDP, Population, and GDP per capita')

# Show the plot
fig.show()
