In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr, spearmanr, kendalltau
from sklearn.model_selection import train_test_split

# Loading the data from CSV
df = pd.read_csv(r'C:/Users/krist/OneDrive/Desktop/DV CODES/201621VERTotal neighborhood profles REG.csv', encoding='latin1')

# Inspecting the data
print(df.head())

# List of variables to test
variables = ['LOW INCOME', 'NO CERTIFICATE', 'UNEMPLOYMENT','POPULATION']

# Dictionary to store correlation results
finalcorrelation_results = {'Variable': [], 'Pearson Correlation': [], 'Pearson P-value': [], 'Spearman Correlation': [], 'Spearman P-value': [], 'Kendall Tau Correlation': [], 'Kendall Tau P-value': []}

# Dictionary to store regression results
finalregression_results = {'Variable': [], 'Mean Squared Error': [], 'R-squared': [], 'Coefficient': [], 'Intercept': []}

# Dictionary to store actual and predicted values
finalpredictions_results = {'Variable': [], 'Actual': [], 'Predicted': []}

# Perform correlation and regression analysis
for var in variables:
    # Correlation analysis
    pearson_corr, pearson_p_value = pearsonr(df[var], df['CRIMES'])
    spearman_corr, spearman_p_value = spearmanr(df[var], df['CRIMES'])
    kendall_corr, kendall_p_value = kendalltau(df[var], df['CRIMES'])
    
    finalcorrelation_results['Variable'].append(var)
    finalcorrelation_results['Pearson Correlation'].append(pearson_corr)
    finalcorrelation_results['Pearson P-value'].append(pearson_p_value)
    finalcorrelation_results['Spearman Correlation'].append(spearman_corr)
    finalcorrelation_results['Spearman P-value'].append(spearman_p_value)
    finalcorrelation_results['Kendall Tau Correlation'].append(kendall_corr)
    finalcorrelation_results['Kendall Tau P-value'].append(kendall_p_value)
    
    # Linear regression analysis
    X = df[[var]]
    y = df['CRIMES']
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Create and train the model on the training set
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Predictions on the test set
    y_pred = model.predict(X_test)
    
    # Store actual and predicted values
    for actual, predicted in zip(y_test, y_pred):
        finalpredictions_results['Variable'].append(var)
        finalpredictions_results['Actual'].append(actual)
        finalpredictions_results['Predicted'].append(predicted)
    
    # Calculating the performance metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    finalregression_results['Variable'].append(var)
    finalregression_results['Mean Squared Error'].append(mse)
    finalregression_results['R-squared'].append(r2)
    finalregression_results['Coefficient'].append(model.coef_[0])
    finalregression_results['Intercept'].append(model.intercept_)
    
    # Create scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(X_test, y_test, color='blue', label='Actual')
    plt.plot(X_test, y_pred, color='red', linewidth=2, label='Predicted')
    plt.xlabel(var)
    plt.ylabel('CRIMES')
    plt.title(f'Scatter Plot and Regression Line for {var} vs CRIMES')
    plt.legend()
    plt.savefig(f'C:/Users/krist/OneDrive/Desktop/DV CODES/finalscatterplot_{var}.png')
    plt.close()

# Create DataFrames from the results
finalcorrelation_results_df = pd.DataFrame(finalcorrelation_results)
finalregression_results_df = pd.DataFrame(finalregression_results)
finalpredictions_results_df = pd.DataFrame(finalpredictions_results)

# Save the results to CSV files
finalcorrelation_results_df.to_csv(r'C:/Users/krist/OneDrive/Desktop/DV CODES/finalcorrelation_results.csv', index=False)
finalregression_results_df.to_csv(r'C:/Users/krist/OneDrive/Desktop/DV CODES/finalregression_results.csv', index=False)
finalpredictions_results_df.to_csv(r'C:/Users/krist/OneDrive/Desktop/DV CODES/finalpredictions_results.csv', index=False)

