In [1]:
# import reqs
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import datetime, statsmodels, warnings
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose 
warnings.simplefilter("ignore")
from math import sqrt
from datetime import date, timedelta
from matplotlib.ticker import StrMethodFormatter

In [33]:
state = input("State:")
county = input("County:")

State: California
County: San Diego


In [34]:
num_heating_days_state = pd.read_csv('processed_data/num_heating_days_state.csv')
num_cooling_days_state = pd.read_csv('processed_data/num_cooling_days_state.csv')
preciptation_state = pd.read_csv('processed_data/preciptation_state.csv')
temperature_avg_state = pd.read_csv('processed_data/temperature_avg_state.csv')
temperature_max_state = pd.read_csv('processed_data/temperature_max_state.csv')
temperature_min_state = pd.read_csv('processed_data/temperature_min_state.csv')

# make the data frame
num_heating_days_state = pd.DataFrame(num_heating_days_state)
num_cooling_days_state = pd.DataFrame(num_cooling_days_state)
preciptation_state = pd.DataFrame(preciptation_state)
temperature_avg_state = pd.DataFrame(temperature_avg_state)
temperature_max_state = pd.DataFrame(temperature_max_state)
temperature_min_state = pd.DataFrame(temperature_min_state)

In [35]:
def select_data(df, val1, val2):
    """Helper function to match the State Name and County"""
    result = df[(df["StateName"] == val1) & (df["name"].str.contains(val2 + " County"))]
    if result.empty:
        return "Error no matching data was found"
    else:
        return result

heat = select_data(num_heating_days_state, state, county)
cool = select_data(num_cooling_days_state, state, county)
precip = select_data(preciptation_state, state, county)
temp_a = select_data(temperature_avg_state, state, county)
temp_max = select_data(temperature_max_state, state, county)
temp_min = select_data(temperature_min_state, state, county)

def basic_stats(df, year, *columns):
    """Function calculates mean and std dev, then uses that to calcualte z-scores to find how far
    from the mean each value is, will return the z-scores"""
    relevant_df = df[['year'] + list(columns)]
    stats = relevant_df.describe().loc[['mean', 'std']]
    z_scores = (relevant_df.drop(columns=['year']) - stats.loc['mean']) / stats.loc['std']
    z_scores['year'] = relevant_df['year']
    cols = ['year'] + list(columns)
    z_scores = z_scores[cols]
    return z_scores

heater_z = pd.DataFrame(basic_stats(heat, "year", "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sept", "oct", "nov", "dec")).fillna(0)
cooler_z = pd.DataFrame(basic_stats(cool, "year", "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sept", "oct", "nov", "dec")).fillna(0)
precip_z = pd.DataFrame(basic_stats(precip, "year", "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sept", "oct", "nov", "dec")).fillna(0)
temp_a_z = pd.DataFrame(basic_stats(temp_a, "year", "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sept", "oct", "nov", "dec")).fillna(0)
temp_max_z = pd.DataFrame(basic_stats(temp_max, "year", "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sept", "oct", "nov", "dec")).fillna(0)
temp_min_z = pd.DataFrame(basic_stats(temp_min, "year", "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sept", "oct", "nov", "dec")).fillna(0)

In [40]:
def prep_time_series(df, value_name='value'):
    """Transform to a time series with a single column."""
    df_long = df.melt(id_vars=['year'], var_name='month', value_name=value_name)
    month_to_num = {'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6,
                    'jul': 7, 'aug': 8, 'sept': 9, 'oct': 10, 'nov': 11, 'dec': 12}
    df_long['month'] = df_long['month'].map(month_to_num)
    df_long['date'] = pd.to_datetime(df_long[['year', 'month']].assign(DAY=1))
    df_long = df_long.sort_values('date')
    df_long.set_index('date', inplace=True)
    df_long.drop(['year', 'month'], axis=1, inplace=True)
    return df_long

def process_and_model(df, value_name='value'):
    """Process the dataframe and apply the SARIMAX model to calculate the slope."""
    df_prep = prep_time_series(df, value_name)

    # Splitting into train and validation sets
    Train = df_prep[df_prep.index.year < 1980].reset_index()
    Valid = df_prep[df_prep.index.year >= 1980].reset_index()

    window_size = 12
    # Calculate rolling mean and std for Train and Valid sets
    for dataset in [Train, Valid]:
        dataset['rolling_mean'] = dataset[value_name].rolling(window=window_size).mean().fillna(0)
        dataset['rolling_std'] = dataset[value_name].rolling(window=window_size).std().fillna(0)
    
    # Fit SARIMAX model
    model = SARIMAX(Train[value_name], exog=Train[['rolling_mean', 'rolling_std']], 
                    order=(1, 0, 1), seasonal_order=(1, 1, 1, 12))
    results = model.fit()

    # Predict on the validation set
    predictions = results.get_prediction(start=Valid.index[0], end=Valid.index[-1], 
                                         exog=Valid[['rolling_mean', 'rolling_std']])
    Valid['predictions'] = predictions.predicted_mean

    slope = calculate_slope_from_predictions(Valid, value_name)
    return slope

def calculate_slope_from_predictions(Valid, value_name):
    """Calculate the slope from SARIMAX model predictions."""
    last_test_date = Valid['date'].iloc[-1]
    prediction_dates = pd.date_range(start=last_test_date + pd.Timedelta(days=1), periods=len(Valid), freq='MS')
    prediction_dates_ordinal = np.array([d.toordinal() for d in prediction_dates])
    slope, intercept = np.polyfit(prediction_dates_ordinal, Valid['predictions'], 1)
    return slope

datasets = [heater_z, cooler_z, precip_z, temp_a_z, temp_max_z, temp_min_z]
dataset_names = ['heater_z', 'cooler_z', 'precip_z', 'temp_a_z', 'temp_max_z', 'temp_min_z']
slope_values = {name: process_and_model(dataset, 'z-score') for dataset, name in zip(datasets, dataset_names)}

 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            7     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.46226D+00    |proj g|=  3.19318D-01

At iterate    5    f=  1.36594D+00    |proj g|=  3.33358D-02

At iterate   10    f=  1.35063D+00    |proj g|=  4.74876D-03

At iterate   15    f=  1.35041D+00    |proj g|=  1.87813D-03

At iterate   20    f=  1.35040D+00    |proj g|=  3.08411D-04

At iterate   25    f=  1.35040D+00    |proj g|=  1.55833D-04

At iterate   30    f=  1.35040D+00    |proj g|=  2.31779D-04

At iterate   35    f=  1.35040D+00    |proj g|=  1.03441D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function 

 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            7     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.15946D+00    |proj g|=  4.14153D-01

At iterate    5    f=  1.08511D+00    |proj g|=  5.17583D-02

At iterate   10    f=  1.05785D+00    |proj g|=  1.34812D-02

At iterate   15    f=  1.05519D+00    |proj g|=  7.82260D-03

At iterate   20    f=  1.05497D+00    |proj g|=  1.12254D-03

At iterate   25    f=  1.05491D+00    |proj g|=  2.10443D-03

At iterate   30    f=  1.05487D+00    |proj g|=  3.62468D-04

At iterate   35    f=  1.05487D+00    |proj g|=  4.67624D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function 

 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            7     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.51991D+00    |proj g|=  2.80844D-01

At iterate    5    f=  1.40484D+00    |proj g|=  3.98987D-02

At iterate   10    f=  1.39060D+00    |proj g|=  4.11256D-03

At iterate   15    f=  1.38941D+00    |proj g|=  1.31748D-03

At iterate   20    f=  1.38667D+00    |proj g|=  2.03267D-02

At iterate   25    f=  1.37158D+00    |proj g|=  2.69821D-02

At iterate   30    f=  1.36632D+00    |proj g|=  1.76597D-02

At iterate   35    f=  1.36492D+00    |proj g|=  5.79839D-03

At iterate   40    f=  1.36490D+00    |proj g|=  1.72599D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg 

 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            7     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.13998D+00    |proj g|=  4.40348D-01

At iterate    5    f=  1.05575D+00    |proj g|=  4.24852D-02

At iterate   10    f=  1.02833D+00    |proj g|=  1.28598D-02

At iterate   15    f=  1.02588D+00    |proj g|=  1.46008D-03

At iterate   20    f=  1.02574D+00    |proj g|=  1.18600D-02

At iterate   25    f=  1.02567D+00    |proj g|=  2.05369D-03

At iterate   30    f=  1.02565D+00    |proj g|=  2.23532D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nac

 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            7     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.19422D+00    |proj g|=  4.13223D-01

At iterate    5    f=  1.10043D+00    |proj g|=  6.81316D-02

At iterate   10    f=  1.08808D+00    |proj g|=  1.45574D-02

At iterate   15    f=  1.08495D+00    |proj g|=  1.55520D-03

At iterate   20    f=  1.08493D+00    |proj g|=  1.81387D-04

At iterate   25    f=  1.08493D+00    |proj g|=  4.80506D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    7     27     33      1     0     0   

 This problem is unconstrained.



At iterate    5    f=  1.08683D+00    |proj g|=  7.62729D-02

At iterate   10    f=  1.05909D+00    |proj g|=  1.42598D-02

At iterate   15    f=  1.05674D+00    |proj g|=  6.09502D-03

At iterate   20    f=  1.05603D+00    |proj g|=  3.91097D-03

At iterate   25    f=  1.05592D+00    |proj g|=  3.74581D-03

At iterate   30    f=  1.05587D+00    |proj g|=  2.82822D-03

At iterate   35    f=  1.05583D+00    |proj g|=  2.69739D-04

At iterate   40    f=  1.05582D+00    |proj g|=  1.11567D-03

At iterate   45    f=  1.05582D+00    |proj g|=  1.21450D-04

At iterate   50    f=  1.05582D+00    |proj g|=  8.96578D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tn

In [41]:
# Define weights for the datasets
weights = {
    'heater_z': 10,
    'cooler_z': 10,
    'precip_z': 30,
    'temp_a_z': 25,
    'temp_max_z': 12.5,
    'temp_min_z': 12.5
}

# Calculate the composite metric for climatological stability
max_slope = max(abs(slope) for slope in slope_values.values())
composite_metric = sum((abs(slope_values[name]) / max_slope) * 100 * (weights[name] / 100) for name in dataset_names)

# Ensure the composite metric is within the desired range
composite_metric = min(composite_metric, 100)

# Print out the composite metric and slope values
print(f"Composite Metric for Climatological Stability: {composite_metric:.2f}")
for name, slope in slope_values.items():
    print(f"{name} Slope: {slope:.10f}")

Composite Metric for Climatological Stability: 47.08
heater_z Slope: -0.0000105119
cooler_z Slope: 0.0000041787
precip_z Slope: 0.0000180844
temp_a_z Slope: -0.0000003891
temp_max_z Slope: -0.0000048523
temp_min_z Slope: 0.0000073317
