In [3]:
import pandas as pd
import numpy as np
import os
import csv
from scipy.stats import linregress

In [4]:
df = pd.read_csv(os.path.join('Data','csv','output','final_data.csv'))
df

FileNotFoundError: [Errno 2] No such file or directory: 'Data/csv/output/final_data.csv'

In [6]:
source_list = ['Energy Source Nuclear', 'Energy Source Coal, peat and oil shale', 'Energy Source Oil products', 'Energy Source Electricity', 'Energy Source Natural gas']
growth_df = pd.DataFrame(columns=['Year', 'Country', 'Energy Consumption per Capita'] + source_list)
rvalue_df = pd.DataFrame(columns=['Country', 'Source', 'R-Value', 'Strength', 'Direction'])

In [7]:
def calculate_growth(value, prev_value):
  if prev_value == 0:
    return np.nan
  else:
    growth = (value - prev_value) / prev_value * 100
    return growth

In [8]:
year_df_array = []

# For each unique year skipping the first year
for year in df['Year'].unique()[1:]:
  contry_df_array = []

  # For each unique country
  for country in df['Country'].unique():
    country_df = pd.DataFrame({ 'Country': country }, index=[0])

    # For each data column
    for column in ['Energy Consumption per Capita'] + source_list:
      # Get the value of the column for the given year and country
      value = df[(df['Year'] == year) & (df['Country'] == country)][column].values[0]

      # Get the value of the column for the previous year and country
      prev_value = df[(df['Year'] == year - 1) & (df['Country'] == country)][column].values[0]

      # Calculate the growth percentage
      growth = calculate_growth(value, prev_value)

      # Add the growth percentage to the country dataframe
      country_df[column] = growth

    contry_df_array.append(country_df)

  # Merge the country dataframes
  year_df = pd.concat(contry_df_array, ignore_index=True)

  # Add the year column
  year_df['Year'] = year

  # Reorder the columns
  year_df = year_df.reindex(columns=growth_df.columns)

  year_df_array.append(year_df)

# Merge the year dataframes
growth_df = pd.concat(year_df_array, ignore_index=True)
growth_df

Unnamed: 0,Year,Country,Energy Consumption per Capita,Energy Source Nuclear,"Energy Source Coal, peat and oil shale",Energy Source Oil products,Energy Source Electricity,Energy Source Natural gas
0,1991,Canada,-0.868233,,8.208955,-4.014599,0.930233,1.930502
1,1991,Mexico,0.860585,,-19.565217,5.137786,-0.831025,6.185567
2,1991,US,-1.493902,,-1.373980,-2.079838,5.294800,-0.859012
3,1991,Argentina,0.176678,,-5.555556,7.741935,4.109589,1.187648
4,1991,Brazil,1.604278,,18.181818,1.385165,3.162055,4.716981
...,...,...,...,...,...,...,...,...
1452,2021,Indonesia,1.071429,,3.891509,2.557148,3.830645,-1.991465
1453,2021,Japan,5.186267,,7.115629,-0.738606,2.006689,4.787234
1454,2021,New Zealand,-3.106682,,0.000000,5.058366,0.000000,-13.761468
1455,2021,Singapore,-0.612282,,0.000000,4.637097,5.464481,4.838710


In [9]:
# Get "Positive" or "Negative" based on the rvalue
def get_direction(value):
  if value > 0:
    return 'Positive'
  elif value < 0:
    return 'Negative'
  else:
    return np.nan

In [10]:
# Get "Weak", "Moderate" or "Strong" based on the rvalue
def get_strength(value):
  if abs(value) < 0.3:
    return 'Weak'
  elif abs(value) < 0.7:
    return 'Moderate'
  else:
    return 'Strong'

In [11]:
contry_df_array = []

# For each unique country in growth_df
for country in growth_df['Country'].unique():
  # Get the country data
  country_data_df = growth_df[growth_df['Country'] == country]

  source_df_array = []

  # For each column in source_list
  for column in source_list:
    # Get the rvalue per source
    rvalue = linregress(country_data_df['Energy Consumption per Capita'], country_data_df[column]).rvalue

    source_df = pd.DataFrame({ 'Country': country, 'Source': column, 'R-Value': rvalue, 'Direction': get_direction(rvalue), 'Strength': get_strength(rvalue) }, index=[0])
    source_df_array.append(source_df)

  country_df = pd.concat(source_df_array, ignore_index=True)

  contry_df_array.append(country_df)

rvalue_df = pd.concat(contry_df_array, ignore_index=True)
rvalue_df


Unnamed: 0,Country,Source,R-Value,Direction,Strength
0,Canada,Energy Source Nuclear,,,Strong
1,Canada,"Energy Source Coal, peat and oil shale",0.478987,Positive,Moderate
2,Canada,Energy Source Oil products,0.717849,Positive,Strong
3,Canada,Energy Source Electricity,0.374916,Positive,Moderate
4,Canada,Energy Source Natural gas,0.669038,Positive,Moderate
...,...,...,...,...,...
230,Thailand,Energy Source Nuclear,,,Strong
231,Thailand,"Energy Source Coal, peat and oil shale",0.500284,Positive,Moderate
232,Thailand,Energy Source Oil products,0.855777,Positive,Strong
233,Thailand,Energy Source Electricity,0.845621,Positive,Strong
