In [85]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [88]:
data_raw = pd.read_csv("raw_set.csv")
data_raw[['Longitude', 'Latitude']].isnull().sum()#0
# data_raw['Latitude'].isnull().sum()#0

Longitude    0
Latitude     0
dtype: int64

In [28]:
def find_correlated(df: pd.DataFrame, threshold=0.9):
    """
    Finds the number of highly correlated column pairs in a DataFrame.
    
    Parameters:
    - df: pd.DataFrame
        The input DataFrame to analyze.
    - threshold: float
        The correlation threshold above which columns are considered highly correlated.
        
    Returns:
    - int
        The number of column pairs with a correlation greater than the threshold.
    """
    # Compute the correlation matrix
    corr_matrix = df.corr()
    
    # Mask the upper triangle of the correlation matrix
    mask = np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)
    upper_triangle = corr_matrix.where(mask)
    
    # Find column pairs with correlation above the threshold
    correlated_pairs = [
        (col1, col2) 
        for col1 in upper_triangle.columns 
        for col2 in upper_triangle.index 
        if abs(upper_triangle.loc[col2, col1]) > threshold
    ]
    
    return len(correlated_pairs)

# Applying the function to the uploaded dataset
correlated_count = find_correlated(data_raw, threshold=0.8)
correlated_count


56436

In [27]:
import networkx as nx

def find_unique_correlated_groups(df: pd.DataFrame, threshold=0.9):
    """
    Finds the unique groups of highly correlated columns in a DataFrame.
    
    Parameters:
    - df: pd.DataFrame
        The input DataFrame to analyze.
    - threshold: float
        The correlation threshold above which columns are considered highly correlated.
        
    Returns:
    - int
        The number of unique groups of correlated columns.
    - list
        A list of unique correlated groups.
    """
    # Compute the correlation matrix
    corr_matrix = df.corr()
    
    # Identify pairs of columns with correlation above the threshold
    correlated_pairs = [
        (col1, col2) 
        for col1 in corr_matrix.columns 
        for col2 in corr_matrix.columns 
        if col1 != col2 and abs(corr_matrix.loc[col1, col2]) > threshold
    ]
    
    # Build a graph where nodes are columns and edges indicate high correlation
    graph = nx.Graph()
    graph.add_edges_from(correlated_pairs)
    
    # Find connected components in the graph (unique groups of correlated columns)
    correlated_groups = list(nx.connected_components(graph))
    
    return len(correlated_groups), correlated_groups
find_unique_correlated_groups(data_raw)

(87,
 [{'AirPollutant: Acetaldehyde_ug/m^3_EPA2017',
   'AirPollutant: Acetaldehyde_ug/m^3_EPA2018',
   'AirPollutant: Formaldehyde_ug/m^3_EPA2017',
   'AirPollutant: Formaldehyde_ug/m^3_EPA2018'},
  {'AirPollutant: Carbon tetrachloride_ug/m^3_EPA2017',
   'AirPollutant: Carbon tetrachloride_ug/m^3_EPA2018'},
  {'API_Single_Race_16yr_And_Over_Commute_To_Work_ACS2017_21',
   'API_Single_Race_16yr_And_Over_For_Work_Commute_ACS2017_21',
   'API_Single_Race_25yrs_and_over_ACS2016_20',
   'API_Single_Race_25yrs_and_over_Less_Than_high_school_degree_ACS2016_20',
   'API_Single_Race_25yrs_and_over_with_Business_Degree_ACS2016_20',
   'API_Single_Race_25yrs_and_over_with_Education_Degree_ACS2016_20',
   'API_Single_Race_25yrs_and_over_with_Other_Field_Degree_ACS2016_20',
   'API_Single_Race_25yrs_and_over_with_STEM_related_Degree_ACS2016_20',
   'API_Single_Race_25yrs_and_over_with_college_or_advanced_degree_ACS2016_20',
   'API_Single_Race_25yrs_and_over_with_high_school_degree_ACS2016_20',
 

In [26]:
data_raw = pd.DataFrame(data_raw.select_dtypes('number'), columns = data_raw.columns)
data_raw = data_raw.drop(columns='Unnamed: 0.1', errors='ignore')

Unnamed: 0,"AirPollutant: 1,3-butadiene_ug/m^3_EPA2018",AirPollutant: Acetaldehyde_ug/m^3_EPA2017,AirPollutant: Acetaldehyde_ug/m^3_EPA2018,AirPollutant: Benzene_ug/m^3_EPA2017,AirPollutant: Benzene_ug/m^3_EPA2018,AirPollutant: Carbon tetrachloride_ug/m^3_EPA2017,AirPollutant: Carbon tetrachloride_ug/m^3_EPA2018,AirPollutant: Formaldehyde_ug/m^3_EPA2017,AirPollutant: Formaldehyde_ug/m^3_EPA2018,Total_population_16yr_And_Over_For_Work_Commute_ACS2017_21,...,White_Single_Race_Male_Percent_of_Diabetes_In_Total_Deaths_CDC2018_22_x,"White_Single_Race_Male_Diabetes_Deaths_Rate_Per_100,000_Residents_CDC2018_22",White_Single_Race_Female_Percent_of_Hypertension_In_Total_Deaths_CDC2018_22_x,"White_Single_Race_Female_Hypertension_Deaths_Rate_Per_100,000_Residents_CDC2018_22",Number_of_hospitals_County_CDCnehTracking_2020,Number_of_Pharmacies_available_per100000_CDCnehTracking_2016_20,Water Area km²,Water Area mi²,Latitude,Longitude
0,0.01,1.50,1.41,0.19,0.18,0.51,0.37,1.84,1.69,25204.0,...,0.0,52.70,0.0,35.50,1.0,11.90,25.776,9.952,32.536382,-86.644490
1,0.01,1.15,1.10,0.17,0.16,0.51,0.37,1.47,1.40,94637.0,...,0.0,34.10,0.0,42.60,4.0,15.00,1133.190,437.527,30.659218,-87.746067
2,0.02,1.51,1.39,0.18,0.17,0.50,0.37,1.83,1.64,8324.0,...,0.0,67.70,0.0,66.70,1.0,15.90,50.865,19.639,31.870670,-85.405456
3,0.01,1.61,1.54,0.18,0.18,0.51,0.37,1.90,1.86,7626.0,...,0.0,72.52,0.0,49.40,1.0,18.00,9.289,3.587,33.015893,-87.127148
4,0.01,1.24,1.19,0.20,0.19,0.50,0.37,1.65,1.59,23240.0,...,0.0,21.00,0.0,53.60,1.0,18.60,15.157,5.852,33.977448,-86.567246
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,0.00,0.32,0.29,0.10,0.10,0.42,0.31,0.51,0.49,20061.0,...,0.0,50.00,0.0,30.90,2.0,11.90,166.887,64.436,41.660339,-108.875676
3139,0.00,0.37,0.30,0.07,0.07,0.41,0.30,0.45,0.42,12795.0,...,0.0,49.06,0.0,48.13,1.0,17.36,572.266,220.953,44.049321,-110.588102
3140,0.00,0.30,0.27,0.08,0.08,0.41,0.30,0.50,0.48,9236.0,...,0.0,58.51,0.0,75.29,2.0,17.49,16.342,6.310,41.284726,-110.558947
3141,0.00,0.41,0.34,0.09,0.10,0.45,0.33,0.54,0.50,3715.0,...,0.0,111.42,0.0,135.87,1.0,42.88,10.762,4.155,43.878831,-107.669052


In [29]:
data_raw.isnull().sum().sum()

0

In [40]:
import networkx as nx

def drop_correlated_columns(df: pd.DataFrame, threshold=0.6):
    """
    Drops columns from groups of highly correlated columns until only one remains per group.

    Parameters:
    - df: pd.DataFrame
        The input DataFrame to analyze and modify.
    - threshold: float
        The correlation threshold above which columns are considered highly correlated.

    Returns:
    - pd.DataFrame
        A modified DataFrame with reduced columns.
    """
    # Compute the correlation matrix
    corr_matrix = df.corr()
    
    # Identify pairs of columns with correlation above the threshold
    correlated_pairs = [
        (col1, col2) 
        for col1 in corr_matrix.columns 
        for col2 in corr_matrix.columns 
        if col1 != col2 and abs(corr_matrix.loc[col1, col2]) > threshold
    ]
    
    # Build a graph where nodes are columns and edges indicate high correlation
    graph = nx.Graph()
    graph.add_edges_from(correlated_pairs)
    
    # Find connected components in the graph (unique groups of correlated columns)
    correlated_groups = list(nx.connected_components(graph))
    
    # For each group, keep only one column (arbitrarily choose the first column in each group)
    columns_to_keep = {list(group)[0] for group in correlated_groups}
    
    # Add any uncorrelated columns (not part of any group)
    uncorrelated_columns = set(df.columns) - set(graph.nodes)
    columns_to_keep.update(uncorrelated_columns)
    columns_to_keep.update(['Longitude', 'Latitude'])
    
    # Convert columns_to_keep to a list and return the reduced DataFrame
    return df[list(columns_to_keep)]

# Apply to your DataFrame
data_raw_reduced = drop_correlated_columns(data_raw, threshold=0.7)
data_raw_reduced.shape


(3143, 308)

In [43]:
data_raw_reduced['Longitude']
data_raw_reduced['Latitude']
data_raw_reduced['Longitude']

0       -86.644490
1       -87.746067
2       -85.405456
3       -87.127148
4       -86.567246
           ...    
3138   -108.875676
3139   -110.588102
3140   -110.558947
3141   -107.669052
3142   -104.570020
Name: Longitude, Length: 3143, dtype: float64

In [45]:
exclude_word = ['cvd', 'death', 'mortality']
data_nocvd = data_raw_reduced.loc[:, ~data_raw_reduced.columns.str.contains('|'.join(exclude_word), case = False)]
data_nocvd

Unnamed: 0,NHPI_Single_Race_Male_25yrs_and_over_with_college_or_advanced_degree_ACS2016_20,Percent_Female_Population_Under_Poverty_ACS2016_20,Strong_Wind_Historic_Loss_Ratio_Buildings_NRI2020,Number_Of_Membership_Associations_Per_10000_Population_2017_CHR,NHPI_Single_Race_Percent_Children_Population_Under_Poverty_ACS2016_20,Asian_Single_Race_Percent_Male_65yrs_Old_And_Over_Population_Under_Poverty_ACS2016_20,White_NH_Single_Race_Percent_Walked_To_Work_ACS2017_21,Asian_Single_Race_Percent_Male_16yrs_And_Over_Without_Earning_ACS2016_20,White_NH_Percent_Single_Race_Population_16_64yrs_Old_Unemployed_ACS2016_20,Heat_Wave_Expected_Annual_Loss_Building_Value_NRI2020,...,AIAN_Single_Race_Percent_Female_In_Production_Transportation_And_Material_Moving_Occupations_ACS2016_20,Black_AA_Single_Race_Percent_Female_65yrs_Old_And_Over_Population_Under_Poverty_ACS2016_20,Cold_Wave_Expected_Annual_Loss_Building_Value_NRI2020,Black_AA_Percent_Single_Race_Male_16_64yrs_Old_In_Labor_Force_Unemployed_ACS2016_20,Percent_Female_Population_In_Service_Occupations_ACS2016_20,Asian_Single_Race_Percent_Female_In_Production_Transportation_And_Material_Moving_Occupations_ACS2016_20,Average_Performance_For_3rd_Graders_Reading_Tests_2016_CHR,Number_of_WIC_Authorized_Store_Per1000_Residents_USDA_2016,Black_AA_Single_Race_Percent_Households_Received_Food_Stamps_In_The_Past_Year_ACS2016_20,AIAN_Single_Race_Percent_Children_Population_Under_Poverty_ACS2016_20
0,0.0,17.41,2.928116e-05,12.071202,0.0,0.0,0.87,39.790,1.87,0.563813,...,100.000,24.01,0.000000,3.82,16.80,9.800,2.959586,0.090511,25.470,0.00
1,0.0,10.62,2.365935e-07,10.205617,0.0,0.0,1.14,17.390,2.27,13.797069,...,10.050,34.67,0.000000,5.07,20.65,0.000,3.005642,0.134802,18.810,21.86
2,0.0,32.04,4.808674e-05,7.518797,0.0,0.0,2.50,11.320,1.63,0.245097,...,8.132,35.47,0.000000,3.13,21.28,0.000,2.660388,0.232387,35.980,0.00
3,0.0,18.26,4.264234e-05,8.381860,0.0,0.0,0.56,100.000,4.21,0.182581,...,0.000,32.67,0.000000,5.11,19.81,0.000,2.628688,0.221474,49.700,0.00
4,0.0,15.71,1.447736e-05,8.446383,0.0,0.0,0.68,7.550,3.54,0.532564,...,0.000,16.88,0.000000,2.92,15.89,100.000,3.025407,0.139089,16.530,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,0.0,13.43,2.134613e-06,10.336748,0.0,0.0,3.02,14.290,5.20,0.000000,...,26.140,0.00,1815.250186,0.00,23.19,0.000,3.163349,0.090344,19.460,0.00
3139,0.0,8.74,1.045941e-05,16.333548,0.0,0.0,10.35,0.000,0.73,0.000000,...,14.580,0.00,274.852624,17.78,24.90,0.000,3.236695,0.129528,0.000,0.00
3140,0.0,10.95,2.784899e-05,2.927543,0.0,0.0,1.61,0.000,2.98,0.000000,...,0.000,0.00,361.109202,0.00,22.67,8.753,3.311837,0.144991,0.000,100.00
3141,0.0,10.55,4.780416e-06,16.121032,0.0,0.0,4.10,28.438,3.86,0.000000,...,11.899,0.00,222.019780,0.00,23.09,30.064,3.510007,0.244858,20.443,0.00


In [53]:
columns_to_scale = [col for col in data_nocvd.columns if col not in ['Longitude', 'Latitude']]
scaler = StandardScaler()
data_scale = scaler.fit_transform(data_nocvd[columns_to_scale])
data_nocvd_scale = data_nocvd.copy()
data_nocvd_scale[columns_to_scale] = data_scale
data_nocvd_scale = data_nocvd_scale.iloc[:-1]
data_nocvd_scale

Unnamed: 0,NHPI_Single_Race_Male_25yrs_and_over_with_college_or_advanced_degree_ACS2016_20,Percent_Female_Population_Under_Poverty_ACS2016_20,Strong_Wind_Historic_Loss_Ratio_Buildings_NRI2020,Number_Of_Membership_Associations_Per_10000_Population_2017_CHR,NHPI_Single_Race_Percent_Children_Population_Under_Poverty_ACS2016_20,Asian_Single_Race_Percent_Male_65yrs_Old_And_Over_Population_Under_Poverty_ACS2016_20,White_NH_Single_Race_Percent_Walked_To_Work_ACS2017_21,Asian_Single_Race_Percent_Male_16yrs_And_Over_Without_Earning_ACS2016_20,White_NH_Percent_Single_Race_Population_16_64yrs_Old_Unemployed_ACS2016_20,Heat_Wave_Expected_Annual_Loss_Building_Value_NRI2020,...,AIAN_Single_Race_Percent_Female_In_Production_Transportation_And_Material_Moving_Occupations_ACS2016_20,Black_AA_Single_Race_Percent_Female_65yrs_Old_And_Over_Population_Under_Poverty_ACS2016_20,Cold_Wave_Expected_Annual_Loss_Building_Value_NRI2020,Black_AA_Percent_Single_Race_Male_16_64yrs_Old_In_Labor_Force_Unemployed_ACS2016_20,Percent_Female_Population_In_Service_Occupations_ACS2016_20,Asian_Single_Race_Percent_Female_In_Production_Transportation_And_Material_Moving_Occupations_ACS2016_20,Average_Performance_For_3rd_Graders_Reading_Tests_2016_CHR,Number_of_WIC_Authorized_Store_Per1000_Residents_USDA_2016,Black_AA_Single_Race_Percent_Households_Received_Food_Stamps_In_The_Past_Year_ACS2016_20,AIAN_Single_Race_Percent_Children_Population_Under_Poverty_ACS2016_20
0,-0.101125,0.277027,0.410697,0.068630,-0.266522,-0.291887,-0.509543,0.482435,-0.930636,-0.084255,...,4.739864,0.634947,-0.151233,-0.151557,-1.153786,-0.090746,-0.327748,-0.618824,0.048442,-0.570328
1,-0.101125,-0.767397,-0.776256,-0.246877,-0.266522,-0.291887,-0.439133,-0.390747,-0.651160,-0.083921,...,-0.093624,1.199262,-0.151233,-0.017290,-0.389896,-0.591049,-0.147636,-0.428858,-0.262078,0.198599
2,-0.101125,2.527382,1.179216,-0.701273,-0.266522,-0.291887,-0.084472,-0.627364,-1.098322,-0.084263,...,-0.196688,1.241612,-0.151233,-0.225673,-0.264896,-0.591049,-1.497838,-0.010313,0.538466,-0.570328
3,-0.101125,0.407772,0.956723,-0.555311,-0.266522,-0.291887,-0.590385,2.829501,0.704299,-0.084265,...,-0.633663,1.093386,-0.151233,-0.012993,-0.556563,-0.591049,-1.621809,-0.057119,1.178154,-0.570328
4,-0.101125,0.015536,-0.194285,-0.544399,-0.266522,-0.291887,-0.559091,-0.774324,0.236176,-0.084256,...,-0.633663,0.257502,-0.151233,-0.248230,-1.334342,4.514082,-0.070337,-0.410470,-0.368382,-0.570328
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,-0.101125,-0.935058,-0.354803,-0.937317,-0.266522,-0.291887,0.510107,-0.164383,0.508666,-0.084269,...,-0.151873,-0.636084,-0.129856,-0.561880,-3.286727,-0.591049,2.129295,-0.147761,-0.944707,-0.570328
3138,-0.101125,-0.335168,-0.698691,-0.224701,-0.266522,-0.291887,0.051134,-0.511589,1.396002,-0.084269,...,0.770977,-0.636084,-0.045036,-0.561880,0.114073,-0.591049,0.469120,-0.619538,-0.231772,-0.570328
3139,-0.101125,-1.056574,-0.358485,0.789478,-0.266522,-0.291887,1.962653,-1.068633,-1.727143,-0.084269,...,0.149797,-0.636084,-0.135153,1.347946,0.453360,-0.591049,0.755955,-0.451478,-1.139085,-0.570328
3140,-0.101125,-0.716637,0.352169,-1.477745,-0.266522,-0.291887,-0.316566,-1.068633,-0.155090,-0.084269,...,-0.633663,-0.636084,-0.130107,-0.561880,0.010899,-0.144197,1.049820,-0.385159,-1.139085,2.947179


In [51]:
response = pd.read_csv('response.csv')
response

Unnamed: 0,CVD death 2019_2020 std
0,0.507612
1,-0.037906
2,2.516687
3,1.692067
4,1.229588
...,...
3137,-0.094418
3138,-2.319163
3139,0.220437
3140,-0.330848


In [70]:
coords = data_nocvd_scale[['Longitude', 'Latitude']].values
predictor = data_nocvd_scale.drop(columns=['Longitude', 'Latitude'], errors='ignore')
predictor


Unnamed: 0,NHPI_Single_Race_Male_25yrs_and_over_with_college_or_advanced_degree_ACS2016_20,Percent_Female_Population_Under_Poverty_ACS2016_20,Strong_Wind_Historic_Loss_Ratio_Buildings_NRI2020,Number_Of_Membership_Associations_Per_10000_Population_2017_CHR,NHPI_Single_Race_Percent_Children_Population_Under_Poverty_ACS2016_20,Asian_Single_Race_Percent_Male_65yrs_Old_And_Over_Population_Under_Poverty_ACS2016_20,White_NH_Single_Race_Percent_Walked_To_Work_ACS2017_21,Asian_Single_Race_Percent_Male_16yrs_And_Over_Without_Earning_ACS2016_20,White_NH_Percent_Single_Race_Population_16_64yrs_Old_Unemployed_ACS2016_20,Heat_Wave_Expected_Annual_Loss_Building_Value_NRI2020,...,AIAN_Single_Race_Percent_Female_In_Production_Transportation_And_Material_Moving_Occupations_ACS2016_20,Black_AA_Single_Race_Percent_Female_65yrs_Old_And_Over_Population_Under_Poverty_ACS2016_20,Cold_Wave_Expected_Annual_Loss_Building_Value_NRI2020,Black_AA_Percent_Single_Race_Male_16_64yrs_Old_In_Labor_Force_Unemployed_ACS2016_20,Percent_Female_Population_In_Service_Occupations_ACS2016_20,Asian_Single_Race_Percent_Female_In_Production_Transportation_And_Material_Moving_Occupations_ACS2016_20,Average_Performance_For_3rd_Graders_Reading_Tests_2016_CHR,Number_of_WIC_Authorized_Store_Per1000_Residents_USDA_2016,Black_AA_Single_Race_Percent_Households_Received_Food_Stamps_In_The_Past_Year_ACS2016_20,AIAN_Single_Race_Percent_Children_Population_Under_Poverty_ACS2016_20
0,-0.101125,0.277027,0.410697,0.068630,-0.266522,-0.291887,-0.509543,0.482435,-0.930636,-0.084255,...,4.739864,0.634947,-0.151233,-0.151557,-1.153786,-0.090746,-0.327748,-0.618824,0.048442,-0.570328
1,-0.101125,-0.767397,-0.776256,-0.246877,-0.266522,-0.291887,-0.439133,-0.390747,-0.651160,-0.083921,...,-0.093624,1.199262,-0.151233,-0.017290,-0.389896,-0.591049,-0.147636,-0.428858,-0.262078,0.198599
2,-0.101125,2.527382,1.179216,-0.701273,-0.266522,-0.291887,-0.084472,-0.627364,-1.098322,-0.084263,...,-0.196688,1.241612,-0.151233,-0.225673,-0.264896,-0.591049,-1.497838,-0.010313,0.538466,-0.570328
3,-0.101125,0.407772,0.956723,-0.555311,-0.266522,-0.291887,-0.590385,2.829501,0.704299,-0.084265,...,-0.633663,1.093386,-0.151233,-0.012993,-0.556563,-0.591049,-1.621809,-0.057119,1.178154,-0.570328
4,-0.101125,0.015536,-0.194285,-0.544399,-0.266522,-0.291887,-0.559091,-0.774324,0.236176,-0.084256,...,-0.633663,0.257502,-0.151233,-0.248230,-1.334342,4.514082,-0.070337,-0.410470,-0.368382,-0.570328
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,-0.101125,-0.935058,-0.354803,-0.937317,-0.266522,-0.291887,0.510107,-0.164383,0.508666,-0.084269,...,-0.151873,-0.636084,-0.129856,-0.561880,-3.286727,-0.591049,2.129295,-0.147761,-0.944707,-0.570328
3138,-0.101125,-0.335168,-0.698691,-0.224701,-0.266522,-0.291887,0.051134,-0.511589,1.396002,-0.084269,...,0.770977,-0.636084,-0.045036,-0.561880,0.114073,-0.591049,0.469120,-0.619538,-0.231772,-0.570328
3139,-0.101125,-1.056574,-0.358485,0.789478,-0.266522,-0.291887,1.962653,-1.068633,-1.727143,-0.084269,...,0.149797,-0.636084,-0.135153,1.347946,0.453360,-0.591049,0.755955,-0.451478,-1.139085,-0.570328
3140,-0.101125,-0.716637,0.352169,-1.477745,-0.266522,-0.291887,-0.316566,-1.068633,-0.155090,-0.084269,...,-0.633663,-0.636084,-0.130107,-0.561880,0.010899,-0.144197,1.049820,-0.385159,-1.139085,2.947179


In [64]:
#coords = coords.to_numpy()
predictor = predictor.to_numpy()
response = response.to_numpy()

In [65]:
from mgwr.gwr import GWR
from mgwr.sel_bw import Sel_BW

selector = Sel_BW(coords, response, predictor)
optimal_bandwidth = selector.search()
gwr_model = GWR(coords, response, predictor, bw=optimal_bandwidth)
gwr_results = gwr_model.fit()

  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
  xtx_inv_xt = linalg.solve(xtx, xT)
 

In [62]:
print(coords.shape)
print(predictor.shape)
print(response.shape)


(3142, 2)
(3142, 295)
(3142, 1)


In [83]:
# Summary of the model
print(gwr_results.summary)

# # Access local coefficients
# local_coefficients = gwr_results.params

# # Example: Visualize coefficients for a predictor
# import matplotlib.pyplot as plt

# plt.scatter(coords[:, 0], coords[:, 1], c=local_coefficients[:, 2], cmap='coolwarm')
# plt.colorbar(label='Coefficient for predictor1')
# plt.xlabel('Longitude')
# plt.ylabel('Latitude')
# plt.title('Spatial Variation in Predictor1 Coefficients')
# plt.show()
gwr_results.summary()

<bound method GWRResults.summary of <mgwr.gwr.GWRResults object at 0x164b7b070>>
Model type                                                         Gaussian
Number of observations:                                                3142
Number of covariates:                                                   295

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                           1842.811
Log-likelihood:                                                   -3620.070
AIC:                                                               7830.141
AICc:                                                              7893.942
BIC:                                                             -21082.983
R2:                                                                   0.413
Adj. R2:                                                              0.353

Variable                              Est.         SE  

In [82]:
import pandas as pd

# Step 1: Map the generic variable names to original column names
original_columns = predictor.columns.tolist()
index_to_column = {f"X{i}": col for i, col in enumerate(original_columns)}

# Step 2: Update variable names in the GWR results table
# Extract the GWR parameter estimates
params = gwr_results.params
t_values = gwr_results.tvalues

# Create a DataFrame for the parameter estimates and t-values
results_df = pd.DataFrame(params, columns=[index_to_column[f"X{i}"] for i in range(params.shape[1])])
tvalues_df = pd.DataFrame(t_values, columns=[index_to_column[f"X{i}"] for i in range(t_values.shape[1])])

# Step 3: Add summary statistics for coefficients
mean_estimates = results_df.mean().abs().sort_values(ascending=False)
summary_table = pd.DataFrame({
    "Variable": mean_estimates.index,
    "Mean Estimate (|coeff|)": mean_estimates.values
})

# Step 4: Generate and Display Results
print("\n--- GWR Results (With Original Column Names) ---\n")
print("Summary Statistics for Coefficients:")
summary_table.head(20)



--- GWR Results (With Original Column Names) ---

Summary Statistics for Coefficients:


Unnamed: 0,Variable,Mean Estimate (|coeff|)
0,Black_AA_Single_Race_Percent_Female_In_Service...,137.606982
1,Percent_of_Farmers_Market_Accept_SFMNP_USDA_2018,136.130121
2,Asian_Single_Race_Percent_Female_18_64yrs_Old_...,132.814423
3,Water Area mi²,114.145831
4,AIAN_Single_Race_Percent_Male_In_Natural_Resou...,113.087703
5,Asian_Single_Race_Percent_Male_16yrs_And_Over_...,110.05809
6,AIAN_Single_Race_Percent_Male_In_Management_Bu...,107.607764
7,Asian_Percent_Single_Race_Male_16_64yrs_Old_In...,105.402892
8,Asian_Single_Race_Percent_Male_In_Production_T...,102.842991
9,Black_AA_Single_Race_Percent_Female_16yrs_And_...,102.329069
