In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import calendar
pio.templates.default = "plotly_dark"
sns.set_style('darkgrid')

np.random.seed(42)

In [88]:
df = pd.read_csv("file_02.csv")
df.drop(['index', 'Nuclear Generation Actual (in MU)', 'Nuclear Generation Estimated (in MU)', 'Hydro Generation Actual (in MU)', 'Hydro Generation Estimated (in MU)'], axis = 1, inplace = True)
df['Date'] = pd.to_datetime(df['Date'])
df['Thermal Generation Actual (in MU)'] = df['Thermal Generation Actual (in MU)'].str.replace(',', '').replace(' ', '').astype(float)
df['Thermal Generation Estimated (in MU)'] = df['Thermal Generation Estimated (in MU)'].str.replace(',', '').replace(' ', '').astype(float)

In [89]:
df.head()

Unnamed: 0,Date,Region,Thermal Generation Actual (in MU),Thermal Generation Estimated (in MU)
0,2017-09-01,Northern,624.23,484.21
1,2017-09-01,Western,1106.89,1024.33
2,2017-09-01,Southern,576.66,578.55
3,2017-09-01,Eastern,441.02,429.39
4,2017-09-01,NorthEastern,29.11,15.91


In [91]:
state_df = pd.read_csv("State_Region_corrected.csv")
state_df['Area (km2)'] = state_df['Area (km2)'].astype(float)
state_df['National Share (%)'] = state_df['National Share (%)'].astype(float)
state_df.rename({'State / Union territory (UT)': 'State/UT'}, axis = 1, inplace = True)

In [92]:
state_df.head()

Unnamed: 0,State/UT,Area (km2),Region,National Share (%)
0,Rajasthan,342239.0,Northern,10.55
1,Madhya Pradesh,308350.0,Central,9.37
2,Maharashtra,307713.0,Western,9.36
3,Uttar Pradesh,240928.0,Northern,7.33
4,Gujarat,196024.0,Western,5.96


In [93]:
dict_features = {
    "Thermal Generation Estimated (in MU)": "sum",
    "Thermal Generation Actual (in MU)": "sum",
   
}
temp = df.groupby('Date').agg(dict_features)
temp.to_csv("thermal_time_series_overall.csv")

In [64]:
state = state_df.groupby('Region')['National Share (%)'].sum().sort_values(ascending = False)
state.to_csv('Distribution of power region wise.csv')

In [65]:
temp = df.groupby("Region").agg({"Thermal Generation Estimated (in MU)": "sum", "Thermal Generation Actual (in MU)": "sum"}).reset_index()
temp.to_csv('Thermal Generation by region.csv')

In [96]:
df['Month'] = df['Date'].dt.month
df['Month'] = df['Month'].apply(lambda x: calendar.month_abbr[int(x)])

In [97]:
data = state_df[['Area (km2)', 'Region']].copy()
data['Total_Area'] = data.groupby('Region')['Area (km2)'].transform('sum')
data.drop('Area (km2)', axis=1, inplace=True)
region_areas = data.drop_duplicates()
region_areas = region_areas[region_areas['Region']!='Central'].reset_index(drop=True)

data = df.loc[df['Date'].dt.year==2017, ['Region','Thermal Generation Actual (in MU)']].copy()

data['Thermal'] = data.groupby('Region')['Thermal Generation Actual (in MU)'].transform('sum')

data.drop(['Thermal Generation Actual (in MU)'], axis=1,inplace=True)

data = data.drop_duplicates().reset_index(drop=True)
data['Area'] = region_areas['Total_Area']
data['Thermal per area'] = data['Thermal']/data['Area']