In [21]:
import yfinance as yf
import pandas as pd
import numpy as np 
import warnings
warnings.filterwarnings('ignore')


from data_downloader import DataDownloader
from var_model import VAR
from dice_model import DiceModel

In [22]:
downloader = DataDownloader()

assets = ['AAPL', 'IBM', 'TSLA', 'GOOG', 'NVDA'] 
benchmark = '^GSPC'  
start_date = '2019-01-01'
end_date = '2023-12-31'
rf = .065

asset_prices, benchmark_prices = downloader.download_data(start_date=start_date, end_date=end_date,
                                                                      assets=assets, benchmark=benchmark)

[*********************100%%**********************]  5 of 5 completed
[*********************100%%**********************]  1 of 1 completed


In [23]:
asset_prices.head()

Unnamed: 0_level_0,AAPL,GOOG,IBM,NVDA,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-02,37.75008,52.164391,83.947762,3.378323,20.674667
2019-01-03,33.989887,50.678543,82.271851,3.174214,20.024
2019-01-04,35.440899,53.404346,85.485207,3.377579,21.179333
2019-01-07,35.362015,53.288631,86.089989,3.556391,22.330667
2019-01-08,36.036129,53.682163,87.314125,3.467853,22.356667


In [24]:
# Cargar archivos CSV
temperature = pd.read_csv('Data/Temperature.csv')
drought = pd.read_csv('Data/Drought_Severity.csv')

# Cargar archivo XLSX
co2_emission = pd.read_excel('Data/CO2_Emissions.xlsx', skiprows=10).drop(index=0)
co2_emission = co2_emission[["Month",
                             "Coal, Including Coal Coke Net Imports, CO2 Emissions",
                             "Natural Gas, Excluding Supplemental Gaseous Fuels, CO2 Emissions", 
                             "Petroleum, Excluding Biofuels, CO2 Emissions", 
                             "Total Energy CO2 Emissions"]]
co2_emission = co2_emission.rename(columns={
    "Month": "Date",
    "Coal, Including Coal Coke Net Imports, CO2 Emissions": "Coal",
    "Natural Gas, Excluding Supplemental Gaseous Fuels, CO2 Emissions": "Natural Gas",
    "Petroleum, Excluding Biofuels, CO2 Emissions": "Petroleum",
    "Total Energy CO2 Emissions": "Total CO2 Emissions"
})

co2_emission

Unnamed: 0,Date,Coal,Natural Gas,Petroleum,Total CO2 Emissions
1,1973-01-01,109.552,125.557,218.972,454.082
2,1973-02-01,98.833,113.75,202.519,415.102
3,1973-03-01,98.483,107.7,201.245,407.428
4,1973-04-01,94.15,97.838,176.222,368.21
5,1973-05-01,95.424,92.039,188.57,376.032
...,...,...,...,...,...
613,2024-01-01,83.031,200.228,183.995,467.886
614,2024-02-01,53.298,160.596,173.319,387.805
615,2024-03-01,46.63,150.845,185.759,383.866
616,2024-04-01,44.366,129.155,184.741,358.875


In [25]:
# Function to count rows and columns in a dataframe
def count_rows_columns(df):
    rows = df.shape[0]
    columns = df.shape[1]
    return rows, columns

# Count rows and columns in the temperature dataframe
co2_rows, co2_columns = count_rows_columns(co2_emission)
print(f'Temperature dataframe has {co2_rows} rows and {co2_columns} columns')

Temperature dataframe has 617 rows and 5 columns


In [26]:
temperature.head()

Unnamed: 0,Entity,Code,year,Day,Average surface temperature,Average surface temperature.1
0,United States,USA,1940,15/01/40,-7.278521,8.045168
1,United States,USA,1940,15/02/40,-3.639929,8.045168
2,United States,USA,1940,15/03/40,0.422743,8.045168
3,United States,USA,1940,15/04/40,6.75601,8.045168
4,United States,USA,1940,15/05/40,13.282525,8.045168


In [27]:
# Function to drop a or multiple columns from a dataframe
def drop_columns(df, columns):
    df = df.drop(columns=columns)   
    return df

# Drop the 'Average surface temperature.1', 'Entity', 'year'  columns from the temperature dataframe and also index
temperature = drop_columns(temperature, ['Average surface temperature.1','Code', 'Entity', 'year'])
temperature.head()



Unnamed: 0,Day,Average surface temperature
0,15/01/40,-7.278521
1,15/02/40,-3.639929
2,15/03/40,0.422743
3,15/04/40,6.75601
4,15/05/40,13.282525


In [28]:
# Function to rename columns in a dataframe
def rename_columns(df, columns):
    df = df.rename(columns=columns)
    return df

# Rename the columns in the temperature dataframe 
temperature = rename_columns(temperature, {'Average surface temperature': 'Temperature'})
temperature.head()

Unnamed: 0,Day,Temperature
0,15/01/40,-7.278521
1,15/02/40,-3.639929
2,15/03/40,0.422743
3,15/04/40,6.75601
4,15/05/40,13.282525


In [29]:
# Display the first few rows of the drought dataset to understand its structure
drought.head()

# Check for missing values in the drought dataset
missing_values = drought.isnull().sum()
print(f'Missing values in each column:\n{missing_values}')

# Drop any columns that are not needed (example: 'Code' column if it exists)
if 'Code' in drought.columns:
    drought = drought.drop(columns=['Code'])

# Remove the 'Name' column
if 'Name' in drought.columns:
    drought = drought.drop(columns=['Name'])

# Function to change name of columns in a dataframe
def change_column_name(df, old_name, new_name):
    df = df.rename(columns={old_name: new_name})
    return df

# Change the name of the 'MapDate' column to 'Date' in the drought dataset
drought = change_column_name(drought, 'MapDate', 'Date')

# Convert the 'Date' column to datetime format
drought['Date'] = pd.to_datetime(drought['Date'], format='%Y%m%d')

# Display the first few rows to verify the changes
drought.head()



Missing values in each column:
Name       0
MapDate    0
DSCI       0
dtype: int64


Unnamed: 0,Date,DSCI
0,2014-12-30,104
1,2015-01-06,100
2,2015-01-13,101
3,2015-01-20,103
4,2015-01-27,109
