## General methods

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import gdown
import datetime as dt
import sqlite3
import kaggle
import scipy.stats as sts
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

### Download Kaggle dataset

In [None]:
def kaggle_dataset_download(kaggle_path, kaggle_path_Name, kaggle_zip_file):
    kaggle.api.authenticate()
    if kaggle_zip_file:
        kaggle.api.dataset_download_files(kaggle_path, kaggle_path_Name, unzip=True)
    else:
        kaggle.api.dataset_download_files(kaggle_path, kaggle_path_Name)

### General Methods

In [None]:
def get_nulls_data(df):
    #We want to know the quality of data. So, let's start by detecting not null percentage related to every column. 
    
    df_tot_nulls = df.isnull().sum().sort_values(ascending=False)
    df_tot_nulls_perc = 100 - round(df_tot_nulls/len(df)*100,2)
    df_tot_perc_nulls = pd.concat([df_tot_nulls,df_tot_nulls_perc],axis=1)
    df_tot_perc_nulls = df_tot_perc_nulls.rename(columns={0: "Total", 1: "PercNotNull"})
    return df_tot_perc_nulls

In [None]:
def get_compound_acceptance_index(row):
    if row['Sentiment'] == 'Extremely Positive':
      return 1
    if row['Sentiment'] == 'Positive':
      return 0.5
    if row['Sentiment'] == 'Neutral':
      return 0
    if row['Sentiment'] == 'Negative':
      return -0.5
    if row['Sentiment'] == 'Extremely Negative':
      return -1
    return 0

In [None]:
def ttest_hypothesis_determination(pval, p_alpha):
    # Example: If alpha (significance) value is 0.05 or 5% it means 95% of confidence
    confidence_perc = 1 - p_alpha
    confidence_perc = 100 * confidence_perc
    
    p_alpha_perc = 100 * p_alpha
    
    str_H1 = "I have enough evidence to reject H0. Therefore, I assume H1 with a confidence of {0}% and significance of {1}%"
    str_H0 = "I don't have enough evidence to reject H0. So we accept is true with a confidence of {0}% and significance of {1}%"
    
    if pval < p_alpha:
       print(str_H1.format(confidence_perc,p_alpha_perc))
    else:
      print(str_H0.format(confidence_perc, p_alpha_perc))

In [None]:
def build_geodf(df, lat_col_name='latitude', lon_col_name='longitude'):
    df = df.copy()
    lat = df[lat_col_name]
    lon = df[lon_col_name]
    return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(lon, lat))