In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

state_data = pd.read_csv('/Datathon Data/acs_5yr_est_selected_economic_characteristics_2010-2022.csv')
stock_data = pd.read_csv('/Datathon Data/all_stock_and_etfs.csv')

# Preprocessing
state_data['Year'] = pd.to_datetime(state_data['YearStart'], format='%Y')
stock_data['Date-Time'] = pd.to_datetime(stock_data['Date-Time'])

# Merge datasets on date field
merged_data = pd.merge(state_data, stock_data, left_on='Year', right_on='Date-Time')

# Example correlation analysis
# Select relevant columns for correlation
relevant_columns = ['Data_Value', 'Open', 'High', 'Low', 'Close', 'Volume']
correlation_matrix = merged_data[relevant_columns].corr()

# Plot the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix between Stock Values and State Metrics')
plt.show()

# Detailed analysis for each state and stock
state_metrics = ['Data_Value']  # Add other relevant metrics from the state dataset
stock_metrics = ['Open', 'High', 'Low', 'Close', 'Volume']

results = []

for state in merged_data['LocationDesc'].unique():
    state_subset = merged_data[merged_data['LocationDesc'] == state]
    for stock in stock_data['Ticker_Symbol'].unique():
        stock_subset = state_subset[state_subset['Ticker_Symbol'] == stock]
        if not stock_subset.empty:
            for state_metric in state_metrics:
                for stock_metric in stock_metrics:
                    corr = stock_subset[state_metric].corr(stock_subset[stock_metric])
                    results.append({
                        'State': state,
                        'Stock': stock,
                        'State Metric': state_metric,
                        'Stock Metric': stock_metric,
                        'Correlation': corr
                    })

results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_df.to_csv('/mnt/data/stock_state_correlation_results.csv', index=False)

# Visualize a few examples
for state in merged_data['LocationDesc'].unique()[:3]:  # Limiting to first 3 states for visualization
    for stock in stock_data['Ticker_Symbol'].unique()[:3]:  # Limiting to first 3 stocks for visualization
        state_subset = merged_data[merged_data['LocationDesc'] == state]
        stock_subset = state_subset[state_subset['Ticker_Symbol'] == stock]
        if not stock_subset.empty:
            plt.figure(figsize=(12, 6))
            for state_metric in state_metrics:
                for stock_metric in stock_metrics:
                    plt.plot(stock_subset['Date-Time'], stock_subset[state_metric], label=f'{state_metric}')
                    plt.plot(stock_subset['Date-Time'], stock_subset[stock_metric], label=f'{stock_metric}')
            plt.title(f'Time Series Plot for {state} and {stock}')
            plt.legend()
            plt.show()
