<a href="https://colab.research.google.com/github/otavio-r-filho/nasa_space_apps/blob/master/notebooks/analise_fatorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NASA Space Apps 2020: COVID-19 Challenge

In [0]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob
%matplotlib inline

plt.style.use("ggplot")

# Package that we may not have
# -----------------------------------------------
# Factory Analyzer
try:
  from factor_analyzer import FactorAnalyzer
  from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
  from factor_analyzer.factor_analyzer import calculate_kmo
except ModuleNotFoundError:
  !pip install factor_analyzer
# -----------------------------------------------

In [0]:
dados_tratados = glob("../data/external/*.csv")

In [221]:
# Google Colab POG
from google.colab import drive
drive.mount('/gdrive')
%cd "/gdrive/My Drive/Hackatons/NASA Space Apps 2020"
dados_tratados = glob("Data/Tratados/*.csv")

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
/gdrive/.shortcut-targets-by-id/148cbL0D25RHhLY_77tnnCNRdM-ul_EOK/NASA Space Apps 2020


## Databases consolidation

### Listing raw datasets

In [222]:
fnames = list(map(os.path.basename, dados_tratados))
wlength = np.max(list(map(len,fnames)))
dados_tratados = dict(zip(fnames, dados_tratados))
dataframes = dict()
for k,v in dados_tratados.items():
  print((k+":").rjust(wlength), v)

                            IDH.csv: Data/Tratados/IDH.csv
REAL_GDP_PER_CAPITA_CONSTANT_US$.csv: Data/Tratados/REAL_GDP_PER_CAPITA_CONSTANT_US$.csv
                   AGUA_TRATADA.csv: Data/Tratados/AGUA_TRATADA.csv
                  BASIC_HYGIENE.csv: Data/Tratados/BASIC_HYGIENE.csv
                      DESASTRES.csv: Data/Tratados/DESASTRES.csv
                  HOSPITAL_BEDS.csv: Data/Tratados/HOSPITAL_BEDS.csv
                   IMMUNIZATION.csv: Data/Tratados/IMMUNIZATION.csv
                    LOCAL_X_USD.csv: Data/Tratados/LOCAL_X_USD.csv
                        MEDICOS.csv: Data/Tratados/MEDICOS.csv
                OPEN_DEFECATION.csv: Data/Tratados/OPEN_DEFECATION.csv
                      POPULACAO.csv: Data/Tratados/POPULACAO.csv
                 REDE_DE_ESGOTO.csv: Data/Tratados/REDE_DE_ESGOTO.csv
                 SALARIO_MINIMO.csv: Data/Tratados/SALARIO_MINIMO.csv
                 IMMUNIZATION_2.csv: Data/Tratados/IMMUNIZATION_2.csv
                 IMMUNIZATION_3.csv: 

#### Number of inhabitants (2010 only)

In [223]:
df_populacao = pd.read_csv(dados_tratados["POPULACAO.csv"])
df_populacao = df_populacao.sort_values(["Year","Country Code"]).drop_duplicates(subset=["Country Code"], keep="last")
df_populacao = df_populacao[["Country Code", "Population"]].set_index("Country Code")
dataframes["population"] = df_populacao
df_populacao.head()

Unnamed: 0_level_0,Population
Country Code,Unnamed: 1_level_1
ARG,40116890.0
BOL,9684295.0
BRA,190747400.0
CHL,17094180.0
COL,48364900.0


#### Human Development Index

In [224]:
df_hdi = pd.read_csv(dados_tratados["IDH.csv"]).rename(columns = {"IDH": "HDI"})
df_hdi = df_hdi.sort_values(["Year", "Country Code"]).drop_duplicates(subset=["Country Code"], keep="last")
df_hdi = df_hdi[["Country Code", "HDI"]].set_index("Country Code")
dataframes["hdi"] = df_hdi
df_hdi.head()

Unnamed: 0_level_0,HDI
Country Code,Unnamed: 1_level_1
ARG,48
BRA,79
CHL,42
COL,79
ECU,85


#### Sewer Network

In [225]:
df_sewer_network = pd.read_csv(dados_tratados["REDE_DE_ESGOTO.csv"]).rename(columns = {"Population using at least basic sanitation services (%)": "Sanitation"})

urban_mask = (df_sewer_network["Type"] == "Urban")
rural_mask = (df_sewer_network["Type"] == "Rural")
total_mask = (df_sewer_network["Type"] == "Total")

df_sewer_network_urban = df_sewer_network.loc[urban_mask, ["Country Code", "Year", "Sanitation"]]
df_sewer_network_urban = df_sewer_network_urban.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_sewer_network_urban = df_sewer_network_urban.rename(columns = {"Sanitation": "Urban Sanitation (%)"})
df_sewer_network_urban = df_sewer_network_urban[["Country Code", "Urban Sanitation (%)"]].set_index("Country Code")

df_sewer_network_rural = df_sewer_network.loc[rural_mask, ["Country Code", "Year", "Sanitation"]]
df_sewer_network_rural = df_sewer_network_rural.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_sewer_network_rural = df_sewer_network_rural.rename(columns = {"Sanitation": "Rural Sanitation (%)"})
df_sewer_network_rural = df_sewer_network_rural[["Country Code", "Rural Sanitation (%)"]].set_index("Country Code")

df_sewer_network_total = df_sewer_network.loc[total_mask, ["Country Code", "Year", "Sanitation"]]
df_sewer_network_total = df_sewer_network_total.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_sewer_network_total = df_sewer_network_total.rename(columns = {"Sanitation": "Total Sanitation (%)"})
df_sewer_network_total = df_sewer_network_total[["Country Code", "Total Sanitation (%)"]].set_index("Country Code")

df_sewer_network = df_sewer_network_urban.join(df_sewer_network_rural)
df_sewer_network = df_sewer_network.join(df_sewer_network_total)

dataframes["sewer_network"] = df_sewer_network
df_sewer_network.head()

Unnamed: 0_level_0,Urban Sanitation (%),Rural Sanitation (%),Total Sanitation (%)
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ARG,96.0,,
BOL,72.0,36.0,61.0
BRA,93.0,60.0,88.0
CHL,100.0,100.0,100.0
COL,93.0,76.0,90.0


#### Open Defecation

In [226]:
df_open_defecation = pd.read_csv(dados_tratados["OPEN_DEFECATION.csv"]).rename(columns = {"Population practising open defecation (%)": "Open Defecation"})

urban_mask = (df_open_defecation["Type"] == "Urban")
rural_mask = (df_open_defecation["Type"] == "Rural")
total_mask = (df_open_defecation["Type"] == "Total")

df_open_defecation_urban = df_open_defecation.loc[urban_mask, ["Country Code", "Year", "Open Defecation"]]
df_open_defecation_urban = df_open_defecation_urban.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_open_defecation_urban = df_open_defecation_urban.rename(columns = {"Open Defecation": "Urban Open Defecation (%)"})
df_open_defecation_urban = df_open_defecation_urban[["Country Code", "Urban Open Defecation (%)"]].set_index("Country Code")

df_open_defecation_rural = df_open_defecation.loc[urban_mask, ["Country Code", "Year", "Open Defecation"]]
df_open_defecation_rural = df_open_defecation_rural.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_open_defecation_rural = df_open_defecation_rural.rename(columns = {"Open Defecation": "Rural Open Defecation (%)"})
df_open_defecation_rural = df_open_defecation_rural[["Country Code", "Rural Open Defecation (%)"]].set_index("Country Code")

df_open_defecation_total = df_open_defecation.loc[urban_mask, ["Country Code", "Year", "Open Defecation"]]
df_open_defecation_total = df_open_defecation_total.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_open_defecation_total = df_open_defecation_total.rename(columns = {"Open Defecation": "Total Open Defecation (%)"})
df_open_defecation_total = df_open_defecation_total[["Country Code", "Total Open Defecation (%)"]].set_index("Country Code")

df_open_defecation = df_open_defecation_urban.join(df_open_defecation_rural)
df_open_defecation = df_open_defecation.join(df_open_defecation_total)

dataframes["open_defecation"] = df_open_defecation
df_open_defecation.head()

Unnamed: 0_level_0,Urban Open Defecation (%),Rural Open Defecation (%),Total Open Defecation (%)
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ARG,2.0,2.0,2.0
BOL,2.0,2.0,2.0
BRA,0.0,0.0,0.0
CHL,0.0,0.0,0.0
COL,1.0,1.0,1.0


#### Clean Water

In [227]:
df_clean_water = pd.read_csv(dados_tratados["AGUA_TRATADA.csv"]).rename(columns = {"Population using at least basic drinking-water services (%)": "Clean Water"})

urban_mask = (df_clean_water["Type"] == "Urban")
rural_mask = (df_clean_water["Type"] == "Rural")
total_mask = (df_clean_water["Type"] == "Total")

df_clean_water_urban = df_clean_water.loc[urban_mask, ["Country Code", "Year", "Clean Water"]]
df_clean_water_urban = df_clean_water_urban.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_clean_water_urban = df_clean_water_urban.rename(columns = {"Clean Water": "Urban Clean Water (%)"})
df_clean_water_urban = df_clean_water_urban[["Country Code", "Urban Clean Water (%)"]].set_index("Country Code")

df_clean_water_rural = df_clean_water.loc[urban_mask, ["Country Code", "Year", "Clean Water"]]
df_clean_water_rural = df_clean_water_rural.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_clean_water_rural = df_clean_water_rural.rename(columns = {"Clean Water": "Rural Clean Water (%)"})
df_clean_water_rural = df_clean_water_rural[["Country Code", "Rural Clean Water (%)"]].set_index("Country Code")

df_clean_water_total = df_clean_water.loc[urban_mask, ["Country Code", "Year", "Clean Water"]]
df_clean_water_total = df_clean_water_total.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_clean_water_total = df_clean_water_total.rename(columns = {"Clean Water": "Total Clean Water (%)"})
df_clean_water_total = df_clean_water_total[["Country Code", "Total Clean Water (%)"]].set_index("Country Code")

df_clean_water = df_clean_water_urban.join(df_clean_water_rural)
df_clean_water = df_clean_water.join(df_clean_water_total)

dataframes["clean_water"] = df_clean_water
df_clean_water.head()

Unnamed: 0_level_0,Urban Clean Water (%),Rural Clean Water (%),Total Clean Water (%)
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ARG,100.0,100.0,100.0
BOL,99.0,99.0,99.0
BRA,100.0,100.0,100.0
CHL,100.0,100.0,100.0
COL,100.0,100.0,100.0


#### Basic Hygiene

In [228]:
df_basic_hygiene = pd.read_csv(dados_tratados["BASIC_HYGIENE.csv"]).rename(columns = {"Population with basic handwashing facilities at home (%)": "Basic Hygiene"})

urban_mask = (df_basic_hygiene["Type"] == "Urban")
rural_mask = (df_basic_hygiene["Type"] == "Rural")
total_mask = (df_basic_hygiene["Type"] == "Total")

df_basic_hygiene_urban = df_basic_hygiene.loc[urban_mask, ["Country Code", "Year", "Basic Hygiene"]]
df_basic_hygiene_urban = df_basic_hygiene_urban.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_basic_hygiene_urban = df_basic_hygiene_urban.rename(columns = {"Basic Hygiene": "Urban Basic Hygiene (%)"})
df_basic_hygiene_urban = df_basic_hygiene_urban[["Country Code", "Urban Basic Hygiene (%)"]].set_index("Country Code")

df_basic_hygiene_rural = df_basic_hygiene.loc[urban_mask, ["Country Code", "Year", "Basic Hygiene"]]
df_basic_hygiene_rural = df_basic_hygiene_rural.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_basic_hygiene_rural = df_basic_hygiene_rural.rename(columns = {"Basic Hygiene": "Rural Basic Hygiene (%)"})
df_basic_hygiene_rural = df_basic_hygiene_rural[["Country Code", "Rural Basic Hygiene (%)"]].set_index("Country Code")

df_basic_hygiene_total = df_basic_hygiene.loc[urban_mask, ["Country Code", "Year", "Basic Hygiene"]]
df_basic_hygiene_total = df_basic_hygiene_total.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep = "last")
df_basic_hygiene_total = df_basic_hygiene_total.rename(columns = {"Basic Hygiene": "Total Basic Hygiene (%)"})
df_basic_hygiene_total = df_basic_hygiene_total[["Country Code", "Total Basic Hygiene (%)"]].set_index("Country Code")

df_basic_hygiene = df_basic_hygiene_urban.join(df_basic_hygiene_rural)
df_basic_hygiene = df_basic_hygiene.join(df_basic_hygiene_total)

dataframes["basic_hygiene"] = df_basic_hygiene
df_basic_hygiene.head()

Unnamed: 0_level_0,Urban Basic Hygiene (%),Rural Basic Hygiene (%),Total Basic Hygiene (%)
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BOL,28.0,28.0,28.0
COL,73.0,73.0,73.0
ECU,84.0,84.0,84.0
MEX,90.0,90.0,90.0
PER,,,


#### Disasters

In [229]:
df_disasters = pd.read_csv(dados_tratados["DESASTRES.csv"]).sort_values(["Country Code", "Year"], ignore_index = True)
countries = df_disasters["Country Code"].unique()
df_disasters = df_disasters.dropna()

df_disasters_aux = pd.DataFrame()

for crt in countries:
  country_mask = (df_disasters["Country Code"] == crt)
  years = np.sort(df_disasters.loc[country_mask, "Year"].values)[-2:]
  year_mask = df_disasters["Year"].isin(years)

  df_disasters_aux = pd.concat([df_disasters_aux,df_disasters.loc[country_mask & year_mask, ["Country Code", "Total Affected"]]])

df_disasters = df_disasters_aux.groupby("Country Code").sum()
dataframes["disasters"] = df_disasters
df_disasters

Unnamed: 0_level_0,Total Affected
Country Code,Unnamed: 1_level_1
ARG,37652.0
BOL,3021.0
BRA,30143.0
CHL,1549.0
COL,79648.0
ECU,421.0
MEX,1297.0
PER,53.0
SLV,16758.0
VEN,1033.0


#### Hospital Beds

In [230]:
df_hospital_beds = pd.read_csv(dados_tratados["HOSPITAL_BEDS.csv"])
df_hospital_beds = df_hospital_beds.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep='last')
df_hospital_beds = df_hospital_beds[["Country Code", "Hospital Beds"]].set_index("Country Code")
dataframes["hospital_beds"] = df_hospital_beds
df_hospital_beds.head()

Unnamed: 0_level_0,Hospital Beds
Country Code,Unnamed: 1_level_1
ARG,5.0
BOL,1.1
BRA,2.2
CHL,2.2
COL,1.5


#### Immunization

In [231]:
df_immunization = pd.read_csv(dados_tratados["IMMUNIZATION_3.csv"])
df_immunization = df_immunization.set_index("Country Code")

dataframes["immunization"] = df_immunization
df_immunization.head()

Unnamed: 0_level_0,BCG,HepB3,IPV1,Hib3,RCV1,Rota1,DTP,PCV,MCV,Mean Coverage Percentage
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ARG,93.0,86.0,90.0,86.0,94.0,88.0,91.0,92.0,99.0,91.0
BOL,90.0,83.0,89.0,83.0,89.0,88.0,89.0,87.0,89.0,87.444444
BRA,98.0,93.0,88.0,83.0,92.0,90.0,87.0,87.0,92.0,90.0
CHL,96.0,95.0,98.0,95.0,93.0,,99.0,98.0,93.0,95.875
COL,89.0,92.0,93.0,92.0,95.0,89.0,92.0,94.0,95.0,92.333333


#### Physicians per 10,000 inhabitants

In [232]:
df_physicians = pd.read_csv(dados_tratados["MEDICOS.csv"]).rename(columns = {"First Tooltip": "Physicians per 10,000"})
df_physicians = df_physicians.sort_values(["Country Code", "Year"]).drop_duplicates(subset="Country Code", keep="last")
df_physicians = df_physicians[["Country Code", "Physicians per 10,000"]].set_index("Country Code")

dataframes["physicians"] = df_physicians
df_physicians.head()

Unnamed: 0_level_0,"Physicians per 10,000"
Country Code,Unnamed: 1_level_1
ARG,39.9
BOL,15.9
BRA,21.64
CHL,25.91
COL,21.85


#### Minimum Wage

In [233]:
df_minimum_wage = pd.read_csv(dados_tratados["SALARIO_MINIMO.csv"]).rename(columns = {"Minimum Wage USD": "Minimum Wage (USD)"})
df_minimum_wage = df_minimum_wage.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep="last")
df_minimum_wage = df_minimum_wage[["Country Code", "Minimum Wage (USD)"]].set_index("Country Code")

dataframes["minimum_wage"] = df_minimum_wage
df_minimum_wage.head()

Unnamed: 0_level_0,Minimum Wage (USD)
Country Code,Unnamed: 1_level_1
ARG,454.8
BOL,291.6
BRA,257.2
CHL,429.6
COL,252.9


#### Currency Value

In [234]:
df_currency_value = pd.read_csv(dados_tratados["LOCAL_X_USD.csv"]).rename(columns = {"Price": "Currency Value (USD)"})
df_currency_value = df_currency_value.sort_values(["Country Code", "Date"]).drop_duplicates(subset = "Country Code", keep="last")
df_currency_value = df_currency_value[["Country Code", "Currency Value (USD)"]].set_index("Country Code")

dataframes["currency_value"] = df_currency_value
df_currency_value.head()

Unnamed: 0_level_0,Currency Value (USD)
Country Code,Unnamed: 1_level_1
ARG,0.0146
BOL,0.1448
BRA,0.1874
CHL,0.001251
COL,0.000268


#### GDP per Capta

In [235]:
df_gdp_capta = pd.read_csv(dados_tratados["REAL_GDP_PER_CAPITA_CONSTANT_US$.csv"]).rename(columns = {"GDP": "GDP (USD)"})
df_gdp_capta = df_gdp_capta.sort_values(["Country Code", "Year"]).drop_duplicates(subset = "Country Code", keep="last")
df_gdp_capta = df_gdp_capta[["Country Code", "GDP (USD)"]].set_index("Country Code")

dataframes["gdp_capta"] = df_gdp_capta
df_gdp_capta.head()

Unnamed: 0_level_0,GDP (USD)
Country Code,Unnamed: 1_level_1
ARG,10043.508552
BOL,2559.511317
BRA,11079.709675
CHL,15130.154322
COL,7691.745564


#### Saving Latin America Dataset

In [236]:
df_latin_america = None
for df_name, df in dataframes.items():
  print("Adding", df_name)
  if df_latin_america is None:
    df_latin_america = df.copy()
  else:
    df_latin_america = df_latin_america.join(df)

df_latin_america.to_csv("Data/Tratados/LATIN_AMERICA.csv", index = False)
df_latin_america.head()

Adding population
Adding hdi
Adding sewer_network
Adding open_defecation
Adding clean_water
Adding basic_hygiene
Adding disasters
Adding hospital_beds
Adding immunization
Adding physicians
Adding minimum_wage
Adding currency_value
Adding gdp_capta


Unnamed: 0_level_0,Population,HDI,Urban Sanitation (%),Rural Sanitation (%),Total Sanitation (%),Urban Open Defecation (%),Rural Open Defecation (%),Total Open Defecation (%),Urban Clean Water (%),Rural Clean Water (%),Total Clean Water (%),Urban Basic Hygiene (%),Rural Basic Hygiene (%),Total Basic Hygiene (%),Total Affected,Hospital Beds,BCG,HepB3,IPV1,Hib3,RCV1,Rota1,DTP,PCV,MCV,Mean Coverage Percentage,"Physicians per 10,000",Minimum Wage (USD),Currency Value (USD),GDP (USD)
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
ARG,40116890.0,48.0,96.0,,,2.0,2.0,2.0,100.0,100.0,100.0,,,,37652.0,5.0,93.0,86.0,90.0,86.0,94.0,88.0,91.0,92.0,99.0,91.0,39.9,454.8,0.0146,10043.508552
BOL,9684295.0,,72.0,36.0,61.0,2.0,2.0,2.0,99.0,99.0,99.0,28.0,28.0,28.0,3021.0,1.1,90.0,83.0,89.0,83.0,89.0,88.0,89.0,87.0,89.0,87.444444,15.9,291.6,0.1448,2559.511317
BRA,190747400.0,79.0,93.0,60.0,88.0,0.0,0.0,0.0,100.0,100.0,100.0,,,,30143.0,2.2,98.0,93.0,88.0,83.0,92.0,90.0,87.0,87.0,92.0,90.0,21.64,257.2,0.1874,11079.709675
CHL,17094180.0,42.0,100.0,100.0,100.0,0.0,0.0,0.0,100.0,100.0,100.0,,,,1549.0,2.2,96.0,95.0,98.0,95.0,93.0,,99.0,98.0,93.0,95.875,25.91,429.6,0.001251,15130.154322
COL,48364900.0,79.0,93.0,76.0,90.0,1.0,1.0,1.0,100.0,100.0,100.0,73.0,73.0,73.0,79648.0,1.5,89.0,92.0,93.0,92.0,95.0,89.0,92.0,94.0,95.0,92.333333,21.85,252.9,0.000268,7691.745564


## Factor Analysis.

<img src="https://github.com/otavio-r-filho/nasa_space_apps/blob/master/reports/figures/poverty_dimensions.jpeg?raw=1" width=40%/>

Importante considerations:
1. There are no outliers in data.
2. Sample size should be greater than the factor.
3. There should not be perfect multicollinearity.
4. There should not be homoscedasticity between the variables.