In [1]:
%load_ext autoreload
%autoreload 2
import sys

# instead of creating a package using setup.py or building from a docker/singularity file,
# import the sister directory of src code to be called on in notebook.
# This keeps the notebook free from code to only hold visualizations and is easier to test
# It also helps keep the state of variables clean such that cells aren't run out of order with a mysterious state
sys.path.append("..")

In [2]:
from src import most_recent_mesonet_data
from src import most_recent_mesonet_time
from src import landtype_describe
from src.plotting_scripts import landtype
import os
import pandas as pd
import cartopy.crs as crs
import cartopy.feature as cfeature
import numpy as np

In [3]:
def format_df(df):
    """
    Assigns colors and descriptions to landtype categories based on the USGS landtype classification.
    Adds a color and divide column to the input dataframe.

    Args:
    df (pd.DataFrame): DataFrame with a "firstmode_val" column containing integer landtype categories and "firstmode_count"
    and "sum_total" columns containing counts of the landtype categories and total number of landtypes, respectively.

    Returns:
    pd.DataFrame: A copy of the input DataFrame with a "color" column containing color labels corresponding to the landtype categories,
    and a "divide" column with percentages of the "firstmode_count" column relative to the "sum_total" column.

    """

    colors = [
        "black",
        "blue",
        "white",
        "coral",
        "pink",
        "red",
        "magenta",
        "gray",
        "lime",
        "forestgreen",
        "green",
        "olive",
        "brown",
        "slategray",
        "darkorchid",
        "plum",
        "indigo",
        "purple",
        "yellow",
        "gold",
        "orange",
        "cyan",
    ]
    legend = np.array(
        [
            0,
            11,
            12,
            21,
            22,
            23,
            24,
            31,
            41,
            42,
            43,
            45,
            51,
            52,
            71,
            72,
            73,
            74,
            81,
            82,
            90,
            95,
        ]
    )
    leg_str = [
        "No Data",
        "Open Water",
        "Perennial Ice/Snow",
        "Developed, Open Space",
        "Developed, Low Intensity",
        "Developed, Medium Intensity",
        "Developed High Intensity",
        "Barren Land (Rock/Sand/Clay)",
        "Deciduous Forest",
        "Evergreen Forest",
        "Mixed Forest",
        "Forest/Shrub",
        "Dwarf Scrub",
        "Shrub/Scrub",
        "Grassland/Herbaceous",
        "Sedge/Herbaceous",
        "Lichens",
        "Moss",
        "Pasture/Hay",
        "Cultivated Crops",
        "Woody Wetlands",
        "Emergent Herbaceous Wetlands",
    ]

    # create dictionaries for landtype descriptions and colors
    descripdict = {}
    colordict = {}

    for x, _ in enumerate(colors):
        descripdict.update({legend[x]: leg_str[x]})
        colordict.update({legend[x]: colors[x]})

    # map the colors based on landtype categories
    df["color"] = df["firstmode_val"].map(colordict)

    # calculate the percentage of the firstmode count for each landtype category
    df["divide"] = (df["firstmode_count"] / df["sum_total"]) * 100

    # return the updated DataFrame
    return df.copy()


def plurality_plot(df):
    colors = [
        "black",
        "blue",
        "white",
        "coral",
        "pink",
        "red",
        "magenta",
        "gray",
        "lime",
        "forestgreen",
        "green",
        "olive",
        "brown",
        "slategray",
        "darkorchid",
        "plum",
        "indigo",
        "purple",
        "yellow",
        "gold",
        "orange",
        "cyan",
    ]
    legend = np.array(
        [
            0,
            11,
            12,
            21,
            22,
            23,
            24,
            31,
            41,
            42,
            43,
            45,
            51,
            52,
            71,
            72,
            73,
            74,
            81,
            82,
            90,
            95,
        ]
    )
    leg_str = [
        "No Data",
        "Open Water",
        "Perennial Ice/Snow",
        "Developed, Open Space",
        "Developed, Low Intensity",
        "Developed, Medium Intensity",
        "Developed High Intensity",
        "Barren Land (Rock/Sand/Clay)",
        "Deciduous Forest",
        "Evergreen Forest",
        "Mixed Forest",
        "Forest/Shrub",
        "Dwarf Scrub",
        "Shrub/Scrub",
        "Grassland/Herbaceous",
        "Sedge/Herbaceous",
        "Lichens",
        "Moss",
        "Pasture/Hay",
        "Cultivated Crops",
        "Woody Wetlands",
        "Emergent Herbaceous Wetlands",
    ]

    # legend
    patches = []
    for i, _ in enumerate(colors):
        patch = mpatches.Patch(color=colors[i], label=leg_str[i])
        patches.append(patch)

    projPC = crs.PlateCarree()
    latN = df["lat"].max() + 1
    latS = df["lat"].min() - 1
    lonW = df["lon"].max() + 1
    lonE = df["lon"].min() - 1
    cLat = (latN + latS) / 2
    cLon = (lonW + lonE) / 2
    projLcc = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

    fig, ax = plt.subplots(
        figsize=(12, 9), subplot_kw={"projection": crs.PlateCarree()}
    )
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle="--")
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(
        crs=crs.PlateCarree(),
        draw_labels=True,
        linewidth=2,
        color="black",
        alpha=0.5,
        linestyle="--",
    )
    ax.scatter(
        x=df["lon"],
        y=df["lat"],
        c=df["color"],
        s=df["divide"],
        marker="o",
        transform=crs.PlateCarree(),
    )
    ax.set_title("Mesonet Site Plurality by Landtype", size=16)
    ax.set_xlabel("Longitude", size=14)
    ax.set_ylabel("Latitude", size=14)
    ax.tick_params(axis="x", labelsize=12)
    ax.tick_params(axis="y", labelsize=12)
    ax.grid()
    ax.legend(
        bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0, handles=patches
    )


def percent_plot(df):
    colors = [
        "black",
        "blue",
        "white",
        "coral",
        "pink",
        "red",
        "magenta",
        "gray",
        "lime",
        "forestgreen",
        "green",
        "olive",
        "brown",
        "slategray",
        "darkorchid",
        "plum",
        "indigo",
        "purple",
        "yellow",
        "gold",
        "orange",
        "cyan",
    ]
    legend = np.array(
        [
            0,
            11,
            12,
            21,
            22,
            23,
            24,
            31,
            41,
            42,
            43,
            45,
            51,
            52,
            71,
            72,
            73,
            74,
            81,
            82,
            90,
            95,
        ]
    )
    leg_str = [
        "No Data",
        "Open Water",
        "Perennial Ice/Snow",
        "Developed, Open Space",
        "Developed, Low Intensity",
        "Developed, Medium Intensity",
        "Developed High Intensity",
        "Barren Land (Rock/Sand/Clay)",
        "Deciduous Forest",
        "Evergreen Forest",
        "Mixed Forest",
        "Forest/Shrub",
        "Dwarf Scrub",
        "Shrub/Scrub",
        "Grassland/Herbaceous",
        "Sedge/Herbaceous",
        "Lichens",
        "Moss",
        "Pasture/Hay",
        "Cultivated Crops",
        "Woody Wetlands",
        "Emergent Herbaceous Wetlands",
    ]

    # legend
    patches = []
    for i, _ in enumerate(colors):
        patch = mpatches.Patch(color=colors[i], label=leg_str[i])
        patches.append(patch)

    fig, ax = plt.subplots(figsize=(20, 10))
    ax.scatter(df["station"], df["divide"], c=df["color"], s=50)
    for n in df.iterrows():
        ax.annotate(n[1]["station"], (n[1]["station"], n[1]["divide"]), fontsize=15)
    ax.grid()
    ax.set_ylabel("Percent of Total", size=20)
    ax.tick_params(labelbottom=False, bottom=False)
    ax.legend(
        bbox_to_anchor=(1.05, 1), loc="center left", borderaxespad=0, handles=patches
    )

In [4]:
def format_df(df):
    new_df = pd.DataFrame()
    value_list = []
    for x, _ in df.iterrows():
        count = int(df.iloc[x]["COUNT"])
        value = df.iloc[x]["VALUE"]
        for n in np.arange(count):
            val = value
            value_list.append(val)
    new_df["VALUE"] = value_list
    return new_df

In [5]:
# This will return the most recent data avail on mesonet
# this is my file path
ny_mesonet_data_path = "/home/aevans/nysm/archive/nysm/netcdf/proc"
ok_mesonet_data_path = "/home/aevans/landtype/geoinfo.csv"

In [6]:
# create a dataframe of mesonet data ny
ny_df = most_recent_mesonet_data.current_time_mesonet_df(ny_mesonet_data_path)
ny_df = most_recent_mesonet_time.most_recent_time(ny_df, ny_mesonet_data_path)
ny_df

Unnamed: 0,index,station,time_5M,lat,lon,elev,tair,ta9m,tslo,relh,...,ts05,ts25,ts50,sm05,sm25,sm50,frozen05,frozen25,frozen50,snow_depth
225,225,ADDI,2024-02-22 18:45:00,42.040359,-77.237259,507.614014,1.455700,1.509156,1.630567,84.838997,...,0.60,1.400000,2.20,0.553,0.459,0.432,0.0,0.0,0.0,0.035197
513,513,ANDE,2024-02-22 18:45:00,42.182270,-74.801392,518.281982,2.839763,2.351061,2.928556,67.068771,...,0.40,1.400000,2.30,0.237,0.118,0.099,0.0,0.0,0.0,0.003247
801,801,BATA,2024-02-22 18:45:00,43.019939,-78.135658,276.119995,6.679603,6.692170,6.730272,92.904671,...,0.40,1.400000,2.50,0.245,0.241,0.275,0.0,0.0,0.0,0.002970
1089,1089,BEAC,2024-02-22 18:45:00,41.528751,-73.945267,90.159798,5.990866,5.309453,6.024615,58.020329,...,1.10,2.200000,2.80,0.398,0.267,0.212,0.0,0.0,0.0,0.034284
1377,1377,BELD,2024-02-22 18:45:00,42.223221,-75.668518,470.369995,2.923032,2.662028,2.882353,72.596008,...,0.70,1.400000,2.20,0.356,0.406,0.408,0.0,0.0,0.0,0.040384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35073,35073,WFMB,2024-02-22 18:45:00,44.393234,-73.858826,614.598999,4.256781,4.149443,4.501074,59.867142,...,0.60,4.099999,2.00,0.245,0.224,0.246,0.0,0.0,0.0,0.204371
35361,35361,WGAT,2024-02-22 18:45:00,43.532410,-75.158600,442.966003,0.093417,-0.025258,0.192366,94.774933,...,0.22,0.640000,1.24,0.150,0.264,0.085,0.0,0.0,0.0,0.305742
35649,35649,WHIT,2024-02-22 18:45:00,43.485073,-73.423073,36.563801,4.524821,4.203181,4.380159,61.980888,...,0.00,0.980000,2.20,0.285,0.475,0.490,0.0,0.0,0.0,0.023897
35937,35937,WOLC,2024-02-22 18:45:00,43.228680,-76.842613,121.219002,3.044803,2.998535,3.096290,95.845078,...,1.10,1.300000,1.70,0.247,0.103,0.105,0.0,0.0,0.0,0.031506


In [7]:
ny_df_lons = ny_df["lon"].to_list()
ny_df_lats = ny_df["lat"].to_list()

In [9]:
# # create a dataframe of mesonet data ok
# ok_df = pd.read_csv(ok_mesonet_data_path)
# ok_df

In [11]:
# ok_df_lons = ok_df["elon"].to_list()
# ok_df_lats = ok_df["nlat"].to_list()

develop way to add modes

In [14]:
csv_path = "/home/aevans/nwp_bias/src/landtype/data/buffer_10_km"

In [15]:
# these are my csvs for the landtype buffers in the nysm
all_files = os.listdir(csv_path)
csv_files = list(filter(lambda f: f.endswith(".csv"), all_files))
csv_files

['avg_lulc_ny_10.csv', 'avg_lulc_ok_10.csv']

In [16]:
station_list_ny = ny_df["station"].to_list()
station_list_ny

['ADDI',
 'ANDE',
 'BATA',
 'BEAC',
 'BELD',
 'BELL',
 'BELM',
 'BERK',
 'BING',
 'BKLN',
 'BRAN',
 'BREW',
 'BROC',
 'BRON',
 'BROO',
 'BSPA',
 'BUFF',
 'BURD',
 'BURT',
 'CAMD',
 'CAPE',
 'CHAZ',
 'CHES',
 'CINC',
 'CLAR',
 'CLIF',
 'CLYM',
 'COBL',
 'COHO',
 'COLD',
 'COPA',
 'COPE',
 'CROG',
 'CSQR',
 'DELE',
 'DEPO',
 'DOVE',
 'DUAN',
 'EAUR',
 'EDIN',
 'EDWA',
 'ELDR',
 'ELLE',
 'ELMI',
 'ESSX',
 'FAYE',
 'FRED',
 'GABR',
 'GFAL',
 'GFLD',
 'GROT',
 'GROV',
 'HAMM',
 'HARP',
 'HARR',
 'HART',
 'HERK',
 'HFAL',
 'ILAK',
 'JOHN',
 'JORD',
 'KIND',
 'LAUR',
 'LOUI',
 'MALO',
 'MANH',
 'MEDI',
 'MEDU',
 'MORR',
 'NBRA',
 'NEWC',
 'NHUD',
 'OLDF',
 'OLEA',
 'ONTA',
 'OPPE',
 'OSCE',
 'OSWE',
 'OTIS',
 'OWEG',
 'PENN',
 'PHIL',
 'PISE',
 'POTS',
 'QUEE',
 'RAND',
 'RAQU',
 'REDF',
 'REDH',
 'ROXB',
 'RUSH',
 'SARA',
 'SBRI',
 'SCHA',
 'SCHO',
 'SCHU',
 'SCIP',
 'SHER',
 'SOME',
 'SOUT',
 'SPRA',
 'SPRI',
 'STAT',
 'STEP',
 'STON',
 'SUFF',
 'TANN',
 'TICO',
 'TULL',
 'TUPP',
 'TYRO',
 

In [17]:
nysm_coords = pd.DataFrame()
nysm_coords["station"] = station_list_ny
nysm_coords["latitude"] = ny_df_lats
nysm_coords["longitude"] = ny_df_lons
nysm_coords

Unnamed: 0,station,latitude,longitude
0,ADDI,42.040359,-77.237259
1,ANDE,42.182270,-74.801392
2,BATA,43.019939,-78.135658
3,BEAC,41.528751,-73.945267
4,BELD,42.223221,-75.668518
...,...,...,...
121,WFMB,44.393234,-73.858826
122,WGAT,43.532410,-75.158600
123,WHIT,43.485073,-73.423073
124,WOLC,43.228680,-76.842613


In [18]:
# paths to data
path_5km = "/home/aevans/landtype/elevation/data/CSVs_slope_ny_5km/"

In [19]:
# x = 0
# for i in range(1,127):
#     df = pd.read_csv(f'{path_5km}/aspect_csv_{i}.csv')
#     df = format_df(df)
#     df.to_csv(f'{path_5km}/{station_list_ny[x]}_aspect.csv')
#     x+= 1

In [20]:
df_complete = pd.DataFrame()
x = 0
for i in range(1, 127):
    df_x = pd.read_csv(f"{path_5km}/aspect_csv_{i}.csv")
    df_x = df_x.assign(Percentage=lambda x: (x["COUNT"] / sum(df_x["COUNT"]) * 100))
    df_x["site"] = i
    df_x = df_x.pivot(index="site", columns="VALUE", values="Percentage")
    df_complete = pd.concat([df_complete, df_x])
    x += 1
df_complete["station"] = station_list_ny
df_complete = df_complete.fillna(0)
df_complete.to_csv("/home/aevans/correlation/aspect_5km.csv")

FileNotFoundError: [Errno 2] No such file or directory: '/home/aevans/landtype/elevation/data/CSVs_slope_ny_5km//aspect_csv_1.csv'

In [None]:
# nysm_coords.to_csv('/home/aevans/landtype/notebooks/nysm_coords.csv')

In [None]:
# first mode
count1_list = [102510]
class_name1_list = ["Deciduous Forest"]
value1_list = [41]

# second mode
count2_list = [89978]
class_name2_list = ["Mixed Forest"]
value2_list = [43]

# third mode
count3_list = [89646]
class_name3_list = ["Pasture/Hay"]
value3_list = [81]

# sums
# sums
sums_list = [349019]

In [None]:
colors = [
    "black",
    "blue",
    "white",
    "coral",
    "pink",
    "red",
    "magenta",
    "gray",
    "lime",
    "forestgreen",
    "green",
    "olive",
    "brown",
    "slategray",
    "darkorchid",
    "plum",
    "indigo",
    "purple",
    "yellow",
    "gold",
    "orange",
    "cyan",
]
legend = np.array(
    [
        0,
        11,
        12,
        21,
        22,
        23,
        24,
        31,
        41,
        42,
        43,
        45,
        51,
        52,
        71,
        72,
        73,
        74,
        81,
        82,
        90,
        95,
    ]
)
leg_str = [
    "No Data",
    "Open Water",
    "Perennial Ice/Snow",
    "Developed, Open Space",
    "Developed, Low Intensity",
    "Developed, Medium Intensity",
    "Developed High Intensity",
    "Barren Land (Rock/Sand/Clay)",
    "Deciduous Forest",
    "Evergreen Forest",
    "Mixed Forest",
    "Forest/Shrub",
    "Dwarf Scrub",
    "Shrub/Scrub",
    "Grassland/Herbaceous",
    "Sedge/Herbaceous",
    "Lichens",
    "Moss",
    "Pasture/Hay",
    "Cultivated Crops",
    "Woody Wetlands",
    "Emergent Herbaceous Wetlands",
]

In [None]:
len(legend)

In [None]:
csv_path_x = "/home/aevans/landtype/data/buffer_10_km/extract_csv_nysm/"
# these are my csvs for the landtype buffers in the nysm
all_files_x = os.listdir(csv_path_x)
csv_files_x = list(filter(lambda f: f.endswith(".csv"), all_files_x))
csv_files_x

In [None]:
df_complete = pd.DataFrame()
for i in range(1, 127):
    df_x = pd.read_csv(
        f"/home/aevans/landtype/data/buffer_10_km/extract_csv_nysm/{i}_csv.csv"
    )
    df_x = df_x.assign(Percentage=lambda x: (x["Count"] / sum(df_x["Count"]) * 100))
    df_x["site"] = i
    df_x = df_x.pivot(index="site", columns="Value", values="Percentage")
    df_complete = pd.concat([df_complete, df_x])

In [None]:
df_complete = df_complete.fillna(0)
df_complete["station"] = station_list_ny

In [None]:
# df_complete.to_csv('/home/aevans/landtype/data/buffer_10_percent.csv')

In [None]:
for i in range(2, 127):
    df = pd.read_csv(
        f"/home/aevans/landtype/data/buffer_10_km/extract_csv_nysm/{i}_csv.csv"
    )
    the_mode = df.sort_values(by=["Count"], ascending=False)
    # first mode
    first_mode = the_mode.iloc[0]
    count1 = first_mode["Count"].tolist()
    class_name1 = first_mode["ClassName"]
    value1 = first_mode["Value"].tolist()
    count1_list.append(count1)
    class_name1_list.append(class_name1)
    value1_list.append(value1)
    # second mode
    second_mode = the_mode.iloc[1]
    count2 = second_mode["Count"].tolist()
    class_name2 = second_mode["ClassName"]
    value2 = second_mode["Value"].tolist()
    count2_list.append(count2)
    class_name2_list.append(class_name2)
    value2_list.append(value2)
    # third mode
    third_mode = the_mode.iloc[2]
    count3 = third_mode["Count"].tolist()
    class_name3 = third_mode["ClassName"]
    value3 = third_mode["Value"].tolist()
    count3_list.append(count3)
    class_name3_list.append(class_name3)
    value3_list.append(value3)
    # sum
    sums = sum(the_mode["Count"])
    sums_list.append(sums)

In [None]:
avg_lulc_ny_10_df = pd.DataFrame()
avg_lulc_ny_10_df["station"] = station_list_ny
# first mode
avg_lulc_ny_10_df["firstmode_count"] = count1_list
avg_lulc_ny_10_df["firstmode_class"] = class_name1_list
avg_lulc_ny_10_df["firstmode_val"] = value1_list
# second mode
avg_lulc_ny_10_df["secondmode_count"] = count2_list
avg_lulc_ny_10_df["secondmode_class"] = class_name2_list
avg_lulc_ny_10_df["secondmode_val"] = value2_list
# third mode
avg_lulc_ny_10_df["thirdmode_count"] = count3_list
avg_lulc_ny_10_df["thirdmode_class"] = class_name3_list
avg_lulc_ny_10_df["thirdmode_val"] = value3_list
avg_lulc_ny_10_df["sum_total"] = sums_list
# avg_lulc_ny_10_df.to_csv('/home/aevans/landtype/data/buffer_10_km/avg_lulc_ny_10.csv')

# Oklahoma

In [None]:
csv_path = "/home/aevans/landtype/data/buffer_10_km/extract_csv_oksm/"

In [None]:
# these are my csvs for the landtype buffers in the nysm
all_files = os.listdir(csv_path)
csv_files = list(filter(lambda f: f.endswith(".csv"), all_files))
csv_files

In [None]:
station_list_ok = ok_df["stid"].to_list()
len(station_list_ok)

In [None]:
oksm_coords = pd.DataFrame()
oksm_coords["station"] = station_list_ok
oksm_coords["latitude"] = ok_df_lats
oksm_coords["longitude"] = ok_df_lons
oksm_coords

In [None]:
oksm_coords.to_csv("/home/aevans/landtype/notebooks/oksm_coords.csv")

In [None]:
# first mode
count1_list_ok = [244513]
class_name1_list_ok = ["Grassland/Herbaceous"]
value1_list_ok = [71]

# second mode
count2_list_ok = [58444]
class_name2_list_ok = ["Deciduous Forest"]
value2_list_ok = [41]

# third mode
count3_list_ok = [20559]
class_name3_list_ok = ["Cultivated Crops"]
value3_list_ok = [82]

# sums
sums_list_ok = [349024]

In [None]:
df_complete_ok = pd.DataFrame()
for i in range(1, 145):
    df_y = pd.read_csv(
        f"/home/aevans/landtype/data/buffer_10_km/extract_csv_oksm/{i}_csv.csv"
    )
    df_y = df_y.assign(Percentage=lambda x: (x["Count"] / sum(df_y["Count"]) * 100))
    df_y["site"] = i
    df_y = df_y.pivot(index="site", columns="Value", values="Percentage")
    df_complete_ok = pd.concat([df_complete_ok, df_y])

In [None]:
df_complete_ok = df_complete_ok.fillna(0)
df_complete_ok["station"] = station_list_ok

In [None]:
# df_complete_ok.to_csv('/home/aevans/landtype/data/OKbuffer_10_percent.csv')

In [None]:
for i in range(2, 145):
    df = pd.read_csv(
        f"/home/aevans/landtype/data/buffer_10_km/extract_csv_oksm/{i}_csv.csv"
    )
    the_mode = df.sort_values(by=["Count"], ascending=False)
    # first mode
    first_mode = the_mode.iloc[0]
    count1 = first_mode["Count"].tolist()
    class_name1 = first_mode["ClassName"]
    value1 = first_mode["Value"].tolist()
    count1_list_ok.append(count1)
    class_name1_list_ok.append(class_name1)
    value1_list_ok.append(value1)
    # second mode
    second_mode = the_mode.iloc[1]
    count2 = second_mode["Count"].tolist()
    class_name2 = second_mode["ClassName"]
    value2 = second_mode["Value"].tolist()
    count2_list_ok.append(count2)
    class_name2_list_ok.append(class_name2)
    value2_list_ok.append(value2)
    # third mode
    third_mode = the_mode.iloc[2]
    count3 = third_mode["Count"].tolist()
    class_name3 = third_mode["ClassName"]
    value3 = third_mode["Value"].tolist()
    count3_list_ok.append(count3)
    class_name3_list_ok.append(class_name3)
    value3_list_ok.append(value3)
    # sum
    sums = sum(the_mode["Count"])
    sums_list_ok.append(sums)

In [None]:
avg_lulc_ok_10_df = pd.DataFrame()
avg_lulc_ok_10_df["station"] = station_list_ok
# first mode
avg_lulc_ok_10_df["firstmode_count"] = count1_list_ok
avg_lulc_ok_10_df["firstmode_class"] = class_name1_list_ok
avg_lulc_ok_10_df["firstmode_val"] = value1_list_ok
# second mode
avg_lulc_ok_10_df["secondmode_count"] = count2_list_ok
avg_lulc_ok_10_df["secondmode_class"] = class_name2_list_ok
avg_lulc_ok_10_df["secondmode_val"] = value2_list_ok
# third mode
avg_lulc_ok_10_df["thirdmode_count"] = count3_list_ok
avg_lulc_ok_10_df["thirdmode_class"] = class_name3_list_ok
avg_lulc_ok_10_df["thirdmode_val"] = value3_list_ok
avg_lulc_ok_10_df["sum_total"] = sums_list_ok
# avg_lulc_ok_10_df.to_csv('/home/aevans/landtype/data/buffer_10_km/avg_lulc_ok_10.csv')

In [None]:
avg_lulc_ny_10_df = pd.read_csv(
    "/home/aevans/landtype/data/buffer_10_km/avg_lulc_ny_10.csv"
)
avg_lulc_ok_10_df = pd.read_csv(
    "/home/aevans/landtype/data/buffer_10_km/avg_lulc_ok_10.csv"
)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import cartopy.crs as crs
import cartopy.feature as cfeature
import numpy as np
from matplotlib.colors import ListedColormap
import matplotlib.patches as mpatches
from matplotlib import colors

LEG_STR = [
    "No Data",
    "Open Water",
    "Perennial Ice/Snow",
    "Developed, Open Space",
    "Developed, Low Intensity",
    "Developed, Medium Intensity",
    "Developed High Intensity",
    "Barren Land (Rock/Sand/Clay)",
    "Deciduous Forest",
    "Evergreen Forest",
    "Mixed Forest",
    "Forest/Shrub",
    "Dwarf Scrub",
    "Shrub/Scrub",
    "Grassland/Herbaceous",
    "Sedge/Herbaceous",
    "Lichens",
    "Moss",
    "Pasture/Hay",
    "Cultivated Crops",
    "Woody Wetlands",
    "Emergent Herbaceous Wetlands",
]
COLORS = [
    "black",
    "blue",
    "white",
    "coral",
    "pink",
    "red",
    "magenta",
    "gray",
    "lime",
    "forestgreen",
    "green",
    "olive",
    "brown",
    "slategray",
    "darkorchid",
    "plum",
    "indigo",
    "purple",
    "yellow",
    "gold",
    "orange",
    "cyan",
]


def create_cmap() -> ListedColormap:
    """
    this creates the landtype colormap

    Returns:
        cmap (ListedColorMap)
    """

    legend = np.array(
        [
            0,
            11,
            12,
            21,
            22,
            23,
            24,
            31,
            41,
            42,
            43,
            45,
            51,
            52,
            71,
            72,
            73,
            74,
            81,
            82,
            90,
            95,
        ]
    )

    colordict = {}
    for x, _ in enumerate(COLORS):
        colordict.update({legend[x]: COLORS[x]})
    return ListedColormap(colordict)


def landtype(df: pd.DataFrame) -> None:
    """
    this plots the landtype for a specified region determined by the imported dataframe

    Args:
        df (pd.DataFrame): landtype, lat, lon
    """
    cmap = create_cmap()

    projPC = crs.PlateCarree()
    latN = df["lat"].max()
    latS = df["lat"].min()
    lonW = df["lon"].max()
    lonE = df["lon"].min()
    cLat = (latN + latS) / 2
    cLon = (lonW + lonE) / 2
    projLcc = crs.LambertConformal(central_longitude=cLon, central_latitude=cLat)

    fig, ax = plt.subplots(
        figsize=(12, 9), subplot_kw={"projection": crs.PlateCarree()}
    )
    ax.legend()
    ax.set_extent([lonW, lonE, latS, latN], crs=projPC)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle="--")
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.STATES)
    ax.xticklabels_top = False
    ax.ylabels_right = False
    ax.gridlines(
        crs=crs.PlateCarree(),
        draw_labels=True,
        linewidth=2,
        color="black",
        alpha=0.5,
        linestyle="--",
    )

    plt.scatter(
        df["lon"],
        df["lat"],
        c=df["color"],
        cmap=cmap,
        transform=crs.PlateCarree(),
        zorder=5,
    )

    # legend
    patches = []
    for i, _ in enumerate(COLORS):
        patch = mpatches.Patch(color=COLORS[i], label=LEG_STR[i])
        patches.append(patch)

    plt.legend(
        bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0, handles=patches
    )

In [None]:
colors = [
    "black",
    "blue",
    "white",
    "coral",
    "pink",
    "red",
    "magenta",
    "gray",
    "lime",
    "forestgreen",
    "green",
    "olive",
    "brown",
    "slategray",
    "darkorchid",
    "plum",
    "indigo",
    "purple",
    "yellow",
    "gold",
    "orange",
    "cyan",
]
legend = np.array(
    [
        0,
        11,
        12,
        21,
        22,
        23,
        24,
        31,
        41,
        42,
        43,
        45,
        51,
        52,
        71,
        72,
        73,
        74,
        81,
        82,
        90,
        95,
    ]
)
leg_str = [
    "No Data",
    "Open Water",
    "Perennial Ice/Snow",
    "Developed, Open Space",
    "Developed, Low Intensity",
    "Developed, Medium Intensity",
    "Developed High Intensity",
    "Barren Land (Rock/Sand/Clay)",
    "Deciduous Forest",
    "Evergreen Forest",
    "Mixed Forest",
    "Forest/Shrub",
    "Dwarf Scrub",
    "Shrub/Scrub",
    "Grassland/Herbaceous",
    "Sedge/Herbaceous",
    "Lichens",
    "Moss",
    "Pasture/Hay",
    "Cultivated Crops",
    "Woody Wetlands",
    "Emergent Herbaceous Wetlands",
]

descripdict = {}
colordict = {}

for x, _ in enumerate(colors):
    descripdict.update({legend[x]: leg_str[x]})
    colordict.update({legend[x]: colors[x]})


avg_lulc_ny_10_df["color"] = avg_lulc_ny_10_df["firstmode_val"].map(colordict)
avg_lulc_ny_10_df["lon"] = ny_df_lons
avg_lulc_ny_10_df["lat"] = ny_df_lats

In [None]:
landtype(avg_lulc_ny_10_df)

In [None]:
colors = [
    "black",
    "blue",
    "white",
    "coral",
    "pink",
    "red",
    "magenta",
    "gray",
    "lime",
    "forestgreen",
    "green",
    "olive",
    "brown",
    "slategray",
    "darkorchid",
    "plum",
    "indigo",
    "purple",
    "yellow",
    "gold",
    "orange",
    "cyan",
]
legend = np.array(
    [
        0,
        11,
        12,
        21,
        22,
        23,
        24,
        31,
        41,
        42,
        43,
        45,
        51,
        52,
        71,
        72,
        73,
        74,
        81,
        82,
        90,
        95,
    ]
)
leg_str = [
    "No Data",
    "Open Water",
    "Perennial Ice/Snow",
    "Developed, Open Space",
    "Developed, Low Intensity",
    "Developed, Medium Intensity",
    "Developed High Intensity",
    "Barren Land (Rock/Sand/Clay)",
    "Deciduous Forest",
    "Evergreen Forest",
    "Mixed Forest",
    "Forest/Shrub",
    "Dwarf Scrub",
    "Shrub/Scrub",
    "Grassland/Herbaceous",
    "Sedge/Herbaceous",
    "Lichens",
    "Moss",
    "Pasture/Hay",
    "Cultivated Crops",
    "Woody Wetlands",
    "Emergent Herbaceous Wetlands",
]

descripdict = {}
colordict = {}

for x, _ in enumerate(colors):
    descripdict.update({legend[x]: leg_str[x]})
    colordict.update({legend[x]: colors[x]})


avg_lulc_ok_10_df["color"] = avg_lulc_ok_10_df["firstmode_val"].map(colordict)
avg_lulc_ok_10_df["lon"] = ok_df_lons
avg_lulc_ok_10_df["lat"] = ok_df_lats

In [None]:
landtype(avg_lulc_ok_10_df)

In [None]:
avg_lulc_ny_10_df

In [None]:
avg_lulc_ny_10_df["firstmode_class"].value_counts()

In [None]:
avg_lulc_ok_10_df["firstmode_class"].value_counts()

In [None]:
def format_df(df):
    colors = [
        "black",
        "blue",
        "white",
        "coral",
        "pink",
        "red",
        "magenta",
        "gray",
        "lime",
        "forestgreen",
        "green",
        "olive",
        "brown",
        "slategray",
        "darkorchid",
        "plum",
        "indigo",
        "purple",
        "yellow",
        "gold",
        "orange",
        "cyan",
    ]
    legend = np.array(
        [
            0,
            11,
            12,
            21,
            22,
            23,
            24,
            31,
            41,
            42,
            43,
            45,
            51,
            52,
            71,
            72,
            73,
            74,
            81,
            82,
            90,
            95,
        ]
    )
    leg_str = [
        "No Data",
        "Open Water",
        "Perennial Ice/Snow",
        "Developed, Open Space",
        "Developed, Low Intensity",
        "Developed, Medium Intensity",
        "Developed High Intensity",
        "Barren Land (Rock/Sand/Clay)",
        "Deciduous Forest",
        "Evergreen Forest",
        "Mixed Forest",
        "Forest/Shrub",
        "Dwarf Scrub",
        "Shrub/Scrub",
        "Grassland/Herbaceous",
        "Sedge/Herbaceous",
        "Lichens",
        "Moss",
        "Pasture/Hay",
        "Cultivated Crops",
        "Woody Wetlands",
        "Emergent Herbaceous Wetlands",
    ]

    descripdict = {}
    colordict = {}

    for x, _ in enumerate(colors):
        descripdict.update({legend[x]: leg_str[x]})
        colordict.update({legend[x]: colors[x]})

    df["color"] = df["firstmode_val"].map(colordict)

    total = df["firstmode_count"] + df["secondmode_count"] + df["thirdmode_count"]
    df["divide"] = (df["firstmode_count"] / total) * 100

In [None]:
colors = [
    "black",
    "blue",
    "white",
    "coral",
    "pink",
    "red",
    "magenta",
    "gray",
    "lime",
    "forestgreen",
    "green",
    "olive",
    "brown",
    "slategray",
    "darkorchid",
    "plum",
    "indigo",
    "purple",
    "yellow",
    "gold",
    "orange",
    "cyan",
]
legend = np.array(
    [
        0,
        11,
        12,
        21,
        22,
        23,
        24,
        31,
        41,
        42,
        43,
        45,
        51,
        52,
        71,
        72,
        73,
        74,
        81,
        82,
        90,
        95,
    ]
)
leg_str = [
    "No Data",
    "Open Water",
    "Perennial Ice/Snow",
    "Developed, Open Space",
    "Developed, Low Intensity",
    "Developed, Medium Intensity",
    "Developed High Intensity",
    "Barren Land (Rock/Sand/Clay)",
    "Deciduous Forest",
    "Evergreen Forest",
    "Mixed Forest",
    "Forest/Shrub",
    "Dwarf Scrub",
    "Shrub/Scrub",
    "Grassland/Herbaceous",
    "Sedge/Herbaceous",
    "Lichens",
    "Moss",
    "Pasture/Hay",
    "Cultivated Crops",
    "Woody Wetlands",
    "Emergent Herbaceous Wetlands",
]

descripdict = {}
colordict = {}

for x, _ in enumerate(colors):
    descripdict.update({legend[x]: leg_str[x]})
    colordict.update({legend[x]: colors[x]})


avg_lulc_ny_10_df["color"] = avg_lulc_ny_10_df["firstmode_val"].map(colordict)

In [None]:
total = (
    avg_lulc_ny_10_df["firstmode_count"]
    + avg_lulc_ny_10_df["secondmode_count"]
    + avg_lulc_ny_10_df["thirdmode_count"]
)

In [None]:
avg_lulc_ny_10_df["divide"] = (avg_lulc_ny_10_df["firstmode_count"] / total) * 100

In [None]:
marklist = []
for x, _ in avg_lulc_ny_10_df.iterrows():
    if avg_lulc_ny_10_df["divide"].iloc[x] < 50:
        mark = "o"
    else:
        mark = "^"
    marklist.append(mark)

In [None]:
avg_lulc_ny_10_df["mark"] = marklist
avg_lulc_ny_10_df

In [None]:
# legend
patches = []
for i, _ in enumerate(colors):
    patch = mpatches.Patch(color=colors[i], label=leg_str[i])
    patches.append(patch)

In [None]:
# make dots the color of the firsmode
# make x axis by long

# could also plot by lon and lat and have scatter size linked to divide
# marker type for above 50 or below 50%

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(
    avg_lulc_ny_10_df["station"],
    avg_lulc_ny_10_df["divide"],
    c=avg_lulc_ny_10_df["color"],
    s=50,
)
for n in avg_lulc_ny_10_df.iterrows():
    ax.annotate(n[1]["station"], (n[1]["station"], n[1]["divide"]), fontsize=15)
ax.grid()
ax.set_ylabel("Percent of Total", size=20)
ax.tick_params(labelbottom=False, bottom=False)
ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0, handles=patches)

In [None]:
fig, ax = plt.subplots()
avg_lulc_ny_10_df.plot.scatter(
    x="lon", y="lat", c="color", s="divide", marker="o", figsize=(9, 6), ax=ax
)
ax.set_title("Mesonet Site Plurality by Landtype", size=16)
ax.set_xlabel("Longitude", size=14)
ax.set_ylabel("Latitude", size=14)
ax.tick_params(axis="x", labelsize=12)
ax.tick_params(axis="y", labelsize=12)
ax.grid()
ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0, handles=patches)

In [None]:
avg_lulc_ok_10_df_1 = format_df(avg_lulc_ok_10_df)
avg_lulc_ok_10_df_1

In [None]:
fig, ax = plt.subplots()
avg_lulc_ok_10_df.plot.scatter(
    x="lon", y="lat", c="color", s="divide", marker="o", figsize=(9, 6), ax=ax
)
ax.set_title("Mesonet Site Plurality by Landtype", size=16)
ax.set_xlabel("Longitude", size=14)
ax.set_ylabel("Latitude", size=14)
ax.tick_params(axis="x", labelsize=12)
ax.tick_params(axis="y", labelsize=12)
ax.grid()
ax.set_ylabel("Percent of Total", size=20)
ax.tick_params(labelbottom=False, bottom=False)
ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0, handles=patches)

In [None]:
fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(
    avg_lulc_ok_10_df["station"],
    avg_lulc_ok_10_df["divide"],
    c=avg_lulc_ok_10_df["color"],
    s=50,
)
for n in avg_lulc_ok_10_df.iterrows():
    ax.annotate(n[1]["station"], (n[1]["station"], n[1]["divide"]), fontsize=15)
ax.grid()
ax.set_ylabel("Percent of Total", size=20)
ax.tick_params(labelbottom=False, bottom=False)
ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0, handles=patches)

In [None]:
plurality_plot(avg_lulc_ny_10_df)

In [None]:
plurality_plot(avg_lulc_ok_10_df)

In [None]:
percent_plot(avg_lulc_ny_10_df)

In [None]:
percent_plot(avg_lulc_ok_10_df)

In [None]:
# get plurality
statlist = []
lon_list = []
lat_list = []
c_list = []
div_list = []

for x, _ in avg_lulc_ny_10_df.iterrows():
    if avg_lulc_ny_10_df["divide"].iloc[x] > 50:
        station = avg_lulc_ny_10_df["station"].iloc[x]
        lons = avg_lulc_ny_10_df["lon"].iloc[x]
        lats = avg_lulc_ny_10_df["lat"].iloc[x]
        color = avg_lulc_ny_10_df["color"].iloc[x]
        divide = avg_lulc_ny_10_df["divide"].iloc[x]
        statlist.append(station)
        lon_list.append(lons)
        lat_list.append(lats)
        c_list.append(color)
        div_list.append(divide)

In [None]:
plur_df = pd.DataFrame()
plur_df["station"] = statlist
plur_df["lon"] = lon_list
plur_df["lat"] = lat_list
plur_df["color"] = c_list
plur_df["divide"] = div_list

In [None]:
plurality_plot(plur_df)

In [None]:
# get plurality
statlist_ok = []
lon_list_ok = []
lat_list_ok = []
c_list_ok = []
div_list_ok = []

for x, _ in avg_lulc_ok_10_df.iterrows():
    if avg_lulc_ok_10_df["divide"].iloc[x] > 50:
        station = avg_lulc_ok_10_df["station"].iloc[x]
        lons = avg_lulc_ok_10_df["lon"].iloc[x]
        lats = avg_lulc_ok_10_df["lat"].iloc[x]
        color = avg_lulc_ok_10_df["color"].iloc[x]
        divide = avg_lulc_ok_10_df["divide"].iloc[x]
        statlist_ok.append(station)
        lon_list_ok.append(lons)
        lat_list_ok.append(lats)
        c_list_ok.append(color)
        div_list_ok.append(divide)

In [None]:
plur_df_ok = pd.DataFrame()
plur_df_ok["station"] = statlist_ok
plur_df_ok["lon"] = lon_list_ok
plur_df_ok["lat"] = lat_list_ok
plur_df_ok["color"] = c_list_ok
plur_df_ok["divide"] = div_list_ok

In [None]:
plurality_plot(plur_df_ok)