In [50]:
# Path to your CSV file
csv_file_path = '/Users/rayhanzirvi/Desktop/annual_generation_state.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# for col in df.columns:
#     print(col)

# Display the first few rows of the DataFrame
# print(df.head())

# Convert 'GENERATION (Megawatthours)' to numeric format, if it contains commas
df['GENERATION (Megawatthours)'] = df['GENERATION (Megawatthours)'].str.replace(',', '').astype(float)

# Filter for the year 2021
df_2021 = df[df['YEAR'] == 2021]

df_2021 = df_2021[df_2021['TYPE OF PRODUCER'] == 'Total Electric Power Industry']

# Define renewable energy sources based on your update
renewable_sources = [
    'Wind', 
    'Wood and Wood Derived Fuels', 
    'Other Biomass', 
    'Hydroelectric Conventional', 
    'Solar Thermal and Photovoltaic', 
    'Pumped Storage', 
    'Geothermal'
]

# Identify renewable energy generation
df_2021['Renewable'] = df_2021['ENERGY SOURCE'].isin(renewable_sources)

# Group by State and Energy Source, then sum the generation
state_energy_generation = df_2021.groupby(['STATE', 'ENERGY SOURCE', 'Renewable'])['GENERATION (Megawatthours)'].sum().reset_index()

# Calculate total and renewable generation by state
total_generation_by_state = df_2021.groupby('STATE')['GENERATION (Megawatthours)'].sum() / 2
renewable_generation_by_state = df_2021[df_2021['Renewable']].groupby('STATE')['GENERATION (Megawatthours)'].sum()

# Calculate the percentage of renewable generation for each state
percentage_renewable = (renewable_generation_by_state / total_generation_by_state * 100).reset_index(name='Percentage Renewable')

# Merge the percentage data back with the detailed state and energy source data
detailed_state_data = pd.merge(state_energy_generation, percentage_renewable, on='STATE')

# Now, print out the details for each state
for state in detailed_state_data['STATE'].unique():
    print(f"State: {state}")
    state_data = detailed_state_data[detailed_state_data['STATE'] == state]
    for _, row in state_data.iterrows():
        print(f"  Energy Source: {row['ENERGY SOURCE']}, Renewable: {row['Renewable']}, Generation (MWh): {row['GENERATION (Megawatthours)']}")
    state_percentage_renewable = state_data['Percentage Renewable'].iloc[0]
    print(f"  Percentage of Renewable Energy: {state_percentage_renewable:.2f}%")
    print("")


State: AK
  Energy Source: Coal, Renewable: False, Generation (MWh): 752894.0
  Energy Source: Hydroelectric Conventional, Renewable: True, Generation (MWh): 1688900.0
  Energy Source: Natural Gas, Renewable: False, Generation (MWh): 3085396.0
  Energy Source: Other, Renewable: False, Generation (MWh): -4141.0
  Energy Source: Other Biomass, Renewable: True, Generation (MWh): 38071.0
  Energy Source: Petroleum, Renewable: False, Generation (MWh): 902486.0
  Energy Source: Solar Thermal and Photovoltaic, Renewable: True, Generation (MWh): 0.0
  Energy Source: Total, Renewable: False, Generation (MWh): 6595818.0
  Energy Source: Wind, Renewable: True, Generation (MWh): 132212.0
  Energy Source: Wood and Wood Derived Fuels, Renewable: True, Generation (MWh): 0.0
  Percentage of Renewable Energy: 28.19%

State: AL
  Energy Source: Coal, Renewable: False, Generation (MWh): 26900909.0
  Energy Source: Hydroelectric Conventional, Renewable: True, Generation (MWh): 11520809.0
  Energy Source: 

In [48]:
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
from mpl_toolkits.axes_grid1 import make_axes_locatable

# Sample data, this would be replaced with the user's actual data.
# State abbreviations should match the GeoDataFrame's key for state names or abbreviations.
data = {
    'State': ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE'],
    'Percentage Renewable': [28.19, 10.74, 8.89, 13.43, 41.05, 32.70, 3.43, 35.05, 3.15]
}
df_renewable = pd.DataFrame(data)

# Get the GeoDataFrame for US states
gdf_states = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
gdf_states = gdf_states[(gdf_states['name'] == "United States of America") &
                        (gdf_states['iso_a2'] == "US")]

# We will rename columns to match the sample data's 'State' column for easier merging.
gdf_states = gdf_states.rename(columns={'iso_3166_2': 'State'})
gdf_states['State'] = gdf_states['State'].str.replace('US-', '')

# Merge the GeoDataFrame with the renewable energy data
gdf_merged = gdf_states.merge(df_renewable, on='State', how='left')

# Plotting the data
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

# Plot the states with no data as grey
gdf_merged.plot(column='Percentage Renewable', ax=ax, legend=True,
                legend_kwds={'label': "Percentage of Renewable Energy"},
                cax=cax, missing_kwds={"color": "lightgrey"},
                cmap='viridis')

# Remove axis off
ax.set_axis_off()

# Set a title
ax.set_title('Percentage of Renewable Energy by State', fontdict={'fontsize': '15', 'fontweight' : '3'})

# Show the plot
plt.show()

KeyError: 'iso_a2'