## General methods

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import gdown
import datetime as dt
import sqlite3
import kaggle
import scipy.stats as sts
import statsmodels.api as sm
import pylab
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

### Download Kaggle dataset

In [None]:
def kaggle_dataset_download(kaggle_path, kaggle_path_Name, kaggle_zip_file):
    kaggle.api.authenticate()
    if kaggle_zip_file:
        kaggle.api.dataset_download_files(kaggle_path, kaggle_path_Name, unzip=True)
    else:
        kaggle.api.dataset_download_files(kaggle_path, kaggle_path_Name)

### General Methods

In [None]:
def get_nulls_data(df):
    #We want to know the quality of data. So, let's start by detecting not null percentage related to every column. 
    
    df_tot_nulls = df.isnull().sum().sort_values(ascending=False)
    df_tot_nulls_perc = 100 - round(df_tot_nulls/len(df)*100,2)
    df_tot_perc_nulls = pd.concat([df_tot_nulls,df_tot_nulls_perc],axis=1)
    df_tot_perc_nulls = df_tot_perc_nulls.rename(columns={0: "Total", 1: "PercNotNull"})
    return df_tot_perc_nulls

In [None]:
def get_compound_acceptance_index(row):
    if row['Sentiment'] == 'Extremely Positive':
      return 1
    if row['Sentiment'] == 'Positive':
      return 0.5
    if row['Sentiment'] == 'Neutral':
      return 0
    if row['Sentiment'] == 'Negative':
      return -0.5
    if row['Sentiment'] == 'Extremely Negative':
      return -1
    return 0

In [None]:
def ttest_hypothesis_determination(pval, p_alpha):
    # Example: If alpha (significance) value is 0.05 or 5% it means 95% of confidence
    confidence_perc = 1 - p_alpha
    confidence_perc = 100 * confidence_perc
    
    p_alpha_perc = 100 * p_alpha
    
    str_H1 = "I have enough evidence to reject H0. Therefore, I assume H1 with a confidence of {0}% and significance of {1}%"
    str_H0 = "I don't have enough evidence to reject H0. So we accept is true with a confidence of {0}% and significance of {1}%"
    
    if pval < p_alpha:
       print(str_H1.format(confidence_perc,p_alpha_perc))
    else:
      print(str_H0.format(confidence_perc, p_alpha_perc))

In [None]:
def build_geodf(df, lat_col_name='latitude', lon_col_name='longitude'):
    df = df.copy()
    lat = df[lat_col_name]
    lon = df[lon_col_name]
    return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(lon, lat))

In [None]:
def get_clinical_dementia_rating(row):
    #https://www.sciencedirect.com/topics/medicine-and-dentistry/clinical-dementia-rating
    if row['CDR'] == 0:
      return "Normal"
    if row['CDR'] == 0.5:
      return "Very Mild Dementia"
    if row['CDR'] == 1:
      return "Mild Dementia"
    if row['CDR'] == 2:
      return "Moderate Dementia"
    if row['CDR'] == 3:
      return "Severe Dementia"
    if row['CDR'] == 4:
      return "Severe Dementia"
    if row['CDR'] == 5:
      return "Severe Dementia"
    return "Normal"

In [None]:
def chi_square_hypothesis_determination(p_alpha, p_chi2, p_dof, pval, p_critical_value):
    # Example: If alpha (significance) value is 0.05 or 5% it means 95% of confidence
    confidence_perc = 1 - p_alpha
    confidence_perc = 100 * confidence_perc
    
    p_alpha_perc = 100 * p_alpha
    
    str_H1 = "I have enough evidence to reject H0 (There is a relationship between the categorical variables). Therefore, I assume H1 with a confidence of {0}%, significance of {1}%, statistic of {2}, degree of freedom of {3}, p_value of {4} and  crital value of {5}."
    str_H0 = "I don't have enough evidence to reject H0 (There is no relationship between 2 categorical variables). So we accept is true with a confidence of {0}%, significance of {1}%, statistic of {2}, degree of freedom of {3}, p_value of {4} and  crital value of {5}."
    
    print("Validation_1:\n")
    if abs(chi2)>=critical_value:
        print("Validating chi2>=critical_value: " + str_H1.format(confidence_perc,p_alpha_perc, p_chi2, p_dof, p_value, p_critical_value))
    else:
        print("Validating chi2>=critical_value: " + str_H0.format(confidence_perc,p_alpha_perc, p_chi2, p_dof, p_value, p_critical_value))

    print("\nValidation_2:\n")
    if pval<=alpha:
        print("Validating pval<=alpha: " + str_H1.format(confidence_perc,p_alpha_perc, p_chi2, p_dof, p_value, p_critical_value))
    else:
        print("Validating pval<=alpha: " + str_H0.format(confidence_perc,p_alpha_perc, p_chi2, p_dof, p_value, p_critical_value))