In [13]:
import yfinance as yf
import pandas as pd
import numpy as np 
import warnings
warnings.filterwarnings('ignore')


from data_downloader import DataDownloader
from var_model import VAR
from dice_model import DiceModel

In [14]:
downloader = DataDownloader()

assets = ['AAPL', 'IBM', 'TSLA', 'GOOG', 'NVDA', 'NEE', 'FSLR', 'VWDRY', 'BEP'] 
benchmark = '^GSPC'  
start_date = '2014-12-01'
end_date = '2024-05-31'
rf = .065

asset_prices, benchmark_prices = downloader.download_data(start_date=start_date, end_date=end_date,
                                                                      assets=assets, benchmark=benchmark)

[*********************100%%**********************]  9 of 9 completed
[*********************100%%**********************]  1 of 1 completed


In [15]:
asset_prices.head()

Unnamed: 0_level_0,AAPL,BEP,FSLR,GOOG,IBM,NEE,NVDA,TSLA,VWDRY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-12-01,25.653629,10.002732,45.759998,26.551716,101.12606,20.429737,0.493978,15.442667,2.201333
2014-12-02,25.555542,10.080501,46.419998,26.549229,101.833466,20.488216,0.494698,15.428667,2.266397
2014-12-03,25.845358,10.216591,47.119999,26.42836,102.991615,20.347864,0.50742,15.286667,2.259167
2014-12-04,25.747269,10.167987,46.720001,26.726309,102.697372,20.172443,0.502859,15.218667,2.289892
2014-12-05,25.638029,9.908765,46.91,26.126928,102.209084,20.063286,0.505739,14.914,2.371222


In [16]:
# Cargar archivos CSV
temperature = pd.read_csv('Data/Temperature.csv')
drought = pd.read_csv('Data/Drought_Severity.csv')

# Cargar archivo XLSX
co2_emission = pd.read_excel('Data/CO2_Emissions.xlsx', skiprows=10).drop(index=0)
co2_emission = co2_emission[["Month",
                             "Coal, Including Coal Coke Net Imports, CO2 Emissions",
                             "Natural Gas, Excluding Supplemental Gaseous Fuels, CO2 Emissions", 
                             "Petroleum, Excluding Biofuels, CO2 Emissions", 
                             "Total Energy CO2 Emissions"]]
co2_emission = co2_emission.rename(columns={
    "Month": "Date",
    "Coal, Including Coal Coke Net Imports, CO2 Emissions": "Coal",
    "Natural Gas, Excluding Supplemental Gaseous Fuels, CO2 Emissions": "Natural Gas",
    "Petroleum, Excluding Biofuels, CO2 Emissions": "Petroleum",
    "Total Energy CO2 Emissions": "Total CO2 Emissions"
})

# Convertir la columna 'Date' a formato datetime e indexarla
co2_emission['Date'] = pd.to_datetime(co2_emission['Date'])
co2_emission = co2_emission.set_index('Date')

# Aplicar el porcentaje de cambio interanual (year-to-year) para cada serie de CO2 (excluyendo la columna 'Date')
co2_emission_pct_change = co2_emission.pct_change(periods=12)  
co2_emission_pct_change = co2_emission_pct_change.dropna()


co2_emission = co2_emission_pct_change
co2_emission

Unnamed: 0_level_0,Coal,Natural Gas,Petroleum,Total CO2 Emissions
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1974-01-01,-0.010579,-0.046322,-0.081750,-0.054783
1974-02-01,-0.008044,-0.032009,-0.098440,-0.058713
1974-03-01,0.010560,-0.003760,-0.076896,-0.036424
1974-04-01,0.003739,-0.074031,-0.001288,-0.019331
1974-05-01,0.024103,-0.066917,-0.063255,-0.041983
...,...,...,...,...
2024-01-01,0.167510,0.120100,0.010273,0.081480
2024-02-01,-0.041299,0.006871,0.007048,0.000088
2024-03-01,-0.210958,-0.074042,-0.042391,-0.078618
2024-04-01,-0.072617,-0.010663,0.001523,-0.012618


In [17]:
# Function to drop a or multiple columns from a dataframe
def drop_columns(df, columns):
    df = df.drop(columns=columns)   
    return df

# Function to rename columns in a dataframe
def rename_columns(df, columns):
    df = df.rename(columns=columns)
    return df

# Drop the 'Average surface temperature.1', 'Entity', 'year'  columns from the temperature dataframe and also index
temperature = drop_columns(temperature, ['Average surface temperature.1','Code', 'Entity', 'year'])

# Rename the columns in the temperature dataframe 
temperature = rename_columns(temperature, {'Day': 'Date',
                             'Average surface temperature': 'Temperature'})
temperature['Date'] = pd.to_datetime(temperature['Date'], format='%d/%m/%y', errors='coerce')
temperature['Date'] = temperature['Date'].apply(lambda x: x.replace(year=x.year - 100) if x.year >= 2025 else x)
temperature = temperature[(temperature['Date'] >= '1940-01-01') & (temperature['Date'] <= '2024-12-31')]
temperature['Date'] = temperature['Date'].apply(lambda x: x.replace(day=1))

# Excluir la columna 'Date' para aplicar pct_change solo a las columnas numéricas
temperature_pct = temperature.drop(columns=['Date']).pct_change(periods=12)

# Volver a agregar la columna 'Date' al dataframe resultante
temperature_pct['Date'] = temperature['Date']

# Eliminar los valores nulos generados por el cálculo del porcentaje de cambio
temperature_pct = temperature_pct.dropna()


temperature = temperature_pct

In [None]:
# Eliminar columnas innecesarias
if 'Code' in drought.columns:
    drought = drought.drop(columns=['Code'])
if 'Name' in drought.columns:
    drought = drought.drop(columns=['Name'])

drought = rename_columns(drought, {'MapDate': 'Date'})
drought['Date'] = pd.to_datetime(drought['Date'], format='%Y%m%d')

# Extraer el año y mes
drought['Year'] = drought['Date'].dt.year
drought['Month'] = drought['Date'].dt.month

# Asegurarte de que la columna 'DSCI' sea numérica y eliminar NaNs
drought['DSCI'] = pd.to_numeric(drought['DSCI'], errors='coerce')

# Agrupar por año y mes y calcular el promedio
monthly_avg = drought.groupby(['Year', 'Month'])['DSCI'].mean().reset_index()
monthly_avg['Month'] = monthly_avg['Month'].fillna(0).astype(int)

# Formatear la fecha como aaaa/mm/01
monthly_avg['Date'] = monthly_avg.apply(lambda row: f"{int(row['Year'])}-{int(row['Month']):02d}-01", axis=1)
drought['Date'] = pd.to_datetime(drought['Date'], format='%Y%m%d')
drought = monthly_avg[['Date', 'DSCI']]

drought

In [None]:
# Asegurarse de que la columna 'Date' en todos los dataframes esté en formato datetime
temperature['Date'] = pd.to_datetime(temperature['Date'], errors='coerce')
drought['Date'] = pd.to_datetime(drought['Date'], errors='coerce')
co2_emission['Date'] = pd.to_datetime(co2_emission['Date'], errors='coerce')

# Realizar la fusión de los dataframes en la columna 'Date'
data = pd.merge(temperature, drought, on='Date', how='inner')
data = pd.merge(data, co2_emission, on='Date', how='inner')

data

In [None]:
# Function to calculate the monthly returns of a dataframe
def calculate_monthly_returns(df):
    if 'Date' in df.columns:
        df = df.set_index('Date')
    
    # Resample to monthly frequency and get the last value of each month
    df = df.resample('M').last()
    
    # Calculate percentage change (returns) and drop NaN values
    df = df.pct_change().dropna()
    
    return df

# Example returns calculation
asset_returns = calculate_monthly_returns(asset_prices)
benchmark_returns = calculate_monthly_returns(benchmark_prices)

# Reset index to turn the Date back into a column
asset_returns = asset_returns.reset_index()

# Modify the Date column to set the day to the 1st of each month
asset_returns['Date'] = asset_returns['Date'].apply(lambda x: x.replace(day=1))

# Display the first rows of the asset returns
asset_returns.head()


In [None]:
data_pct = calculate_monthly_returns(data)
data_pct


In [None]:
asset_returns.plot(x='Date', y=assets, figsize=(14, 7), title='Asset Returns')