In [None]:
!pip install geopandas



In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

pfas_stats = pd.read_csv('/preprocessing/PFAS_Data.csv')
states = gpd.read_file('/preprocessing/tl_2021/tl_2021_us_state.shp')

def prepare_df_agg(df, column):
    df = df.dropna(subset=[column])
    df = df.sort_values(by=[column], ascending=False)
    df_agg = df.groupby(['city', 'Latitude', 'Longitude']).max()[column].reset_index()
    return df_agg

def prepare_geo_df(df, column):
    df_agg = prepare_df_agg(df, column)
    geo_df = gpd.GeoDataFrame(df_agg, geometry=gpd.points_from_xy(df_agg.Longitude, df_agg.Latitude))
    return geo_df

def plot_socio_df(geo_df, hex_color, socio_column, cblabel, titlelabel, ax=None):
    if ax is None:
        geo_df = prepare_geo_df(geo_df, socio_column)
        plt.ioff()
        states.plot(figsize=(10, 10), color=hex_color, linewidth=1, aspect=1)

        geo_df_values = geo_df.loc[geo_df[socio_column].notnull()]

        plt.scatter(
            geo_df_values['Longitude'],
            geo_df_values['Latitude'],
            s=0.5,
            c=(geo_df_values[socio_column]),
            cmap='Greens',
            zorder=3
        )

        cbar = plt.colorbar(label=cblabel, shrink=0.5)

        geo_df_zero = geo_df.loc[geo_df[socio_column] == 0]
        plt.scatter(geo_df_zero['Longitude'], geo_df_zero['Latitude'], s=0.5, c='White', zorder=1)

        plt.ylim([23, 49.38])
        plt.xlim([-124.77, -66.95])

        plt.xlabel("Longitude", size=12)
        plt.ylabel("Latitude", size=12)
        plt.title(titlelabel)

        # Generate file path with f-string
        file_path = f"/analysis2/plot_{socio_column}.png"

        # Save the plot as a PNG file
        plt.savefig(file_path)

        plt.show()
        plt.clf()

    else:
        geo_df = prepare_geo_df(geo_df, socio_column)
        plt.ioff()
        states.plot(ax=ax, figsize=(10, 10), color=hex_color, linewidth=1, aspect=1)

        geo_df_values = geo_df.loc[geo_df[socio_column].notnull()]

        scatter = ax.scatter(
            geo_df_values['Longitude'],
            geo_df_values['Latitude'],
            s=0.5,
            c=(geo_df_values[socio_column]),
            cmap='Greens',
            zorder=3
        )

        cbar = plt.colorbar(scatter, ax=ax, label=cblabel, shrink=0.5)


        geo_df_zero = geo_df.loc[geo_df[socio_column] == 0]
        ax.scatter(geo_df_zero['Longitude'], geo_df_zero['Latitude'], s=0.5, c='White', zorder=1)

        ax.set_ylim([23, 49.38])
        ax.set_xlim([-124.77, -66.95])

        ax.set_xlabel("Longitude", size=12)
        ax.set_ylabel("Latitude", size=12)
        ax.set_title(titlelabel)


In [6]:
plot_socio_df(pfas_stats, '#D3D3D3', "N_D_LESSHS_PER", "National Less than High School Education Percentile",
              "The National Less than High School Education Percentile of Major Cities in the United States")

plot_socio_df(pfas_stats, '#D3D3D3', "N_D_INCOME_PER", "National Low Income Percentile",
              "The National Low Income Percentile of Major Cities in the United States")

plot_socio_df(pfas_stats, '#D3D3D3', "N_D_MINOR_PER", "National Minority Percentile",
              "The National Minority Percentile of Major Cities in the United States")

plot_socio_df(pfas_stats, '#D3D3D3', "N_D_UNEMPLOYED_PER", "National Unemployment Rate Percentile",
              "The National Unemployment Rate Percentile of Major Cities in the United States")

plot_socio_df(pfas_stats, '#D3D3D3', "N_D_LESSHS_PER", "National Less than High School Education Percentile",
              "The National Less than High School Education Percentile of Major Cities in the United States")

plot_socio_df(pfas_stats, '#D3D3D3', 'N_D_UNDER5_PER', "National Under Age 5 Percentile",
              "The National Under Age 5 Percentile of Major Cities in the United States")

plot_socio_df(pfas_stats, '#D3D3D3', 'N_D_OVER64_PER', "National Over Age 64 Percentile",
              "The National Over Age 64 Percentile of Major Cities in the United States")

Output hidden; open in https://colab.research.google.com to view.

In [None]:
import matplotlib.gridspec as gridspec

fig = plt.figure(figsize=(20, 20))

grid=(3,2)

ax_hs = plt.subplot2grid(grid, (0, 0))
ax_inc = plt.subplot2grid(grid, (0, 1))
ax_min = plt.subplot2grid(grid, (1, 0))
ax_un = plt.subplot2grid(grid, (1, 1))
ax_u5 = plt.subplot2grid(grid, (2, 0))
ax_o64 = plt.subplot2grid(grid, (2, 1))



plot_socio_df(pfas_stats, '#D3D3D3', "N_D_LESSHS_PER", "National Less than High School Education Percentile",
              "The National Less than High School Education Percentile of Major Cities in the United States", ax=ax_hs)

plot_socio_df(pfas_stats, '#D3D3D3', "N_D_INCOME_PER", "National Low Income Percentile",
              "The National Low Income Percentile of Major Cities in the United States", ax=ax_inc)

plot_socio_df(pfas_stats, '#D3D3D3', "N_D_MINOR_PER", "National Minority Percentile",
              "The National Minority Percentile of Major Cities in the United States", ax=ax_min)

plot_socio_df(pfas_stats, '#D3D3D3', "N_D_UNEMPLOYED_PER", "National Unemployment Rate Percentile",
              "The National Unemployment Rate Percentile of Major Cities in the United States", ax=ax_un)

plot_socio_df(pfas_stats, '#D3D3D3', 'N_D_UNDER5_PER', "National Under Age 5 Percentile",
              "The National Under Age 5 Percentile of Major Cities in the United States", ax=ax_u5)

plot_socio_df(pfas_stats, '#D3D3D3', 'N_D_OVER64_PER', "National Over Age 64 Percentile",
              "The National Over Age 64 Percentile of Major Cities in the United States", ax=ax_o64)


ax_hs.text(-128, 52, '2a', fontsize=20, fontweight='bold')
ax_inc.text(-128, 52, '2b', fontsize=20, fontweight='bold')
ax_min.text(-128, 52, '2c', fontsize=20, fontweight='bold')
ax_un.text(-128, 52, '2d', fontsize=20, fontweight='bold')
ax_u5.text(-128, 52, '2e', fontsize=20, fontweight='bold')
ax_o64.text(-128, 52, '2f', fontsize=20, fontweight='bold')



plt.tight_layout()

file_path = f"/content/drive/MyDrive/pfas_python_files/no_population_analysis2/Figure2.png"
plt.savefig(file_path)

fig