In [1]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Read in energy generation data as dataframe
generation_df = pd.read_csv('Data/energy_generation_data.csv', index_col=0, parse_dates=True)

generation_df.head()

Unnamed: 0_level_0,coal,natural gas,nuclear,petroleum,other,solar,hydro,wind
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-12-31,1870097,4270620,2245684,7293,221858,305983,632266,1074159
2023-12-30,1964710,4369981,2215492,7661,224151,322528,628762,1064144
2023-12-29,1948705,4482785,2228321,8311,216563,312185,718066,1109628
2023-12-28,1776641,4480968,2224448,9639,209483,309365,701614,1119395
2023-12-27,1679634,4397551,2215389,8236,218310,262265,723729,1093226


In [4]:
# Read in energy demand data as dataframe
demand_df = pd.read_csv('Data/energy_demand_data.csv', index_col=0, parse_dates=True)

demand_df.head()

Unnamed: 0,total_demand
2023-12-31,10674523
2023-12-30,10821410
2023-12-29,11084647
2023-12-28,10907167
2023-12-27,10674973


In [5]:
# Sum amounts in each column of generation_df
generation_df['total_generated'] = generation_df.sum(axis=1)

generation_df['total_generated'].head()

date
2023-12-31    10627960
2023-12-30    10797429
2023-12-29    11024564
2023-12-28    10831553
2023-12-27    10598340
Name: total_generated, dtype: int64

In [8]:
# Combine dataframes
energy_df = generation_df.merge(demand_df, how='left', left_index=True, right_index=True)

energy_df.head()

Unnamed: 0_level_0,coal,natural gas,nuclear,petroleum,other,solar,hydro,wind,total_generated,total_demand
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-12-31,1870097,4270620,2245684,7293,221858,305983,632266,1074159,10627960,10674523
2023-12-30,1964710,4369981,2215492,7661,224151,322528,628762,1064144,10797429,10821410
2023-12-29,1948705,4482785,2228321,8311,216563,312185,718066,1109628,11024564,11084647
2023-12-28,1776641,4480968,2224448,9639,209483,309365,701614,1119395,10831553,10907167
2023-12-27,1679634,4397551,2215389,8236,218310,262265,723729,1093226,10598340,10674973


In [9]:
# Check null values
energy_df.isna().sum()

coal               0
natural gas        0
nuclear            0
petroleum          0
other              0
solar              0
hydro              0
wind               0
total_generated    0
total_demand       0
dtype: int64

In [10]:
# Get summary statistics for entire dataframe
energy_df.describe()

Unnamed: 0,coal,natural gas,nuclear,petroleum,other,solar,hydro,wind,total_generated,total_demand
count,1826.0,1826.0,1826.0,1826.0,1826.0,1826.0,1826.0,1826.0,1826.0,1826.0
mean,2271783.0,4156591.0,2165472.0,22896.248083,207222.064622,283472.796824,736404.0,1021341.0,10865180.0,11031570.0
std,563721.2,855936.2,162485.8,17994.892259,36432.08553,117613.939261,121239.1,351829.4,1301253.0,1304928.0
min,1136578.0,2362626.0,1749670.0,2959.0,132441.0,46755.0,403323.0,290078.0,8238613.0,8404965.0
25%,1838388.0,3497317.0,2056667.0,12411.0,183975.75,194812.25,657441.8,758696.0,9893677.0,10028400.0
50%,2186894.0,4001372.0,2197422.0,22649.5,203175.0,261994.0,733928.0,992305.0,10581300.0,10761940.0
75%,2701618.0,4698168.0,2272156.0,27260.75,225769.5,358195.25,814905.0,1241677.0,11769070.0,11941910.0
max,3962782.0,6658612.0,2509082.0,427809.0,672637.0,587923.0,1122145.0,2076086.0,14699910.0,14829750.0


In [11]:
# Check if there are entries where total_generated >= total_demand
energy_df[energy_df['total_generated'] >= energy_df['total_demand']]

Unnamed: 0_level_0,coal,natural gas,nuclear,petroleum,other,solar,hydro,wind,total_generated,total_demand
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-12-14,1966272,4716258,2215313,10894,212605,287687,677762,1280292,11367083,11343308
2023-12-13,2142857,4826811,2214804,10446,207415,255117,693142,1112513,11463105,11420225
2023-12-12,2122030,4857500,2228112,11894,204378,316635,690244,1126899,11557692,11538087
2023-12-11,2115498,5048076,2212048,9133,201834,337969,658876,954481,11537915,11492941
2023-12-10,1758564,4241941,2213835,8274,209189,277478,538059,1135502,10382842,10340024
2023-12-09,1425607,3544286,2220954,8007,204384,302981,539085,1908785,10154089,10077066
2023-12-08,1647986,3951326,2262376,9378,205692,289081,578368,1771446,10715653,10676745
2023-12-07,1756944,4142559,2274760,9019,218738,305698,581667,1963765,11253150,11209022
2023-12-06,1841018,4490969,2270156,9330,206133,345397,585871,1474400,11223274,11180718
2023-12-05,1999554,4834104,2252981,9369,212582,340115,546621,821742,11017068,10955827


In [15]:
more_generated = len(energy_df[energy_df['total_generated'] >= energy_df['total_demand']])

print(f'''
    Surplus Energy Days: {more_generated}
    Deficit Energy Days: {len(energy_df) - more_generated}
''')


    Surplus Energy Days: 31
    Deficit Energy Days: 1795

