# Introduction
Dans ce notebook, on cherche à trouver le return du NASDAQ 100 entre deux dates au choix. 
Ceci afin de pouvoir comparer la performance des entreprises au marché en fonction de leurs dates de publications des résultats.  

On récupère donc les données OHLC de l'etf QQQ qui réplique l'indice NASDAQ 100, puis on crée une matrice qui contient tous les returns de l'indice pour toutes les combinaisons de dates de 2020 à 2024.

# Récupération des données

In [3]:
import requests
from datetime import datetime
from kaggle_secrets import UserSecretsClient
import pandas as pd
import time

user_secrets = UserSecretsClient()
api_key = user_secrets.get_secret("FPM_KEY")

# On utilise la fonction écrite par Samuel
def get_historical_price(symbol, from_date, to_date, api_key):
    """
    Récupère les données historiques des prix pour un symbole donné.

    Args:
        symbol (str): Le symbole de l'action ou ETF
        from_date (str): Date de début au format YYYY-MM-DD
        to_date (str): Date de fin au format YYYY-MM-DD
        api_key (str): Clé API pour Financial Modeling Prep

    Returns:
        pandas.DataFrame: DataFrame contenant les données historiques
    """
    base_url = "https://financialmodelingprep.com/api/v3/historical-price-full/"
    url = f"{base_url}{symbol}?from={from_date}&to={to_date}&apikey={api_key}"

    response = requests.get(url)

    if response.status_code == 200: # Si la requête est OK (succès)
        data = response.json()
        if "historical" in data:
            historical_data = data["historical"]
            df = pd.DataFrame(historical_data)
            # Convertir la colonne 'date' en datetime et la définir comme index
            df['date'] = pd.to_datetime(df['date'])
            df = df.sort_values('date')  # Trier par date croissante
            return df
        else:
            print(f"Pas de données historiques pour {symbol}")
            return None
    else:
        print(f"Erreur de requête pour {symbol}: {response.status_code}")
        return None

In [4]:
# Ticker de l'indice sous forme de liste pour garder la syntaxe ci-dessous
tickers = ['QQQ']

# Paramètres pour la requête
from_date = "2020-01-01"
to_date = "2024-12-31"

# On crée un df vide price 
df_price = pd.DataFrame()

# Récupérer les données pour chaque ticker
for ticker in tickers:
    print(f"\nRécupération des données pour {ticker}...")
    df = get_historical_price(ticker, from_date, to_date, api_key)
    df['symbol'] = ticker

    if df is not None:
        # On merge ici chaque requête dans le df vide
        df_price = pd.concat([df_price, pd.DataFrame(df)], axis = 0)
    else:
        print(f"✗ Échec de récupération des données pour {ticker}")

    # Ajouter un délai pour éviter de surcharger l'API
    time.sleep(1)

print("\nTraitement terminé pour tous les tickers.")


Récupération des données pour QQQ...

Traitement terminé pour tous les tickers.


In [22]:
df

Unnamed: 0,date,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,label,changeOverTime,symbol
1257,2020-01-02,214.40,216.16,213.98,216.16,209.02,31014904,31014904,1.76,0.820900,215.1750,"January 02, 20",0.008209,QQQ
1256,2020-01-03,213.29,215.47,213.28,214.18,207.11,27532657,27532657,0.89,0.417270,214.0550,"January 03, 20",0.004173,QQQ
1255,2020-01-06,212.49,215.59,212.24,215.56,208.44,21726989,21726989,3.07,1.440000,213.9700,"January 06, 20",0.014400,QQQ
1254,2020-01-07,215.64,216.14,214.85,215.53,208.41,23120994,23120994,-0.11,-0.051011,215.5400,"January 07, 20",-0.000510,QQQ
1253,2020-01-08,215.47,218.14,215.16,217.15,209.98,26464212,26464212,1.68,0.779690,216.4800,"January 08, 20",0.007797,QQQ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,2024-12-24,524.83,530.05,524.19,529.96,529.17,17558215,17558215,5.13,0.977460,527.2575,"December 24, 24",0.009775,QQQ
3,2024-12-26,528.32,531.24,526.31,529.60,528.81,19141494,19141494,1.28,0.242280,528.8675,"December 26, 24",0.002423,QQQ
2,2024-12-27,526.01,526.45,517.86,522.56,521.78,33839600,33839600,-3.45,-0.655880,523.2200,"December 27, 24",-0.006559,QQQ
1,2024-12-30,515.51,519.36,511.83,515.61,514.84,34584028,34584028,0.10,0.019398,515.5775,"December 30, 24",0.000194,QQQ


# Cleaning

In [24]:
setup_df = df[['close', 'date', 'symbol']]
display(setup_df)
display(setup_df.isna().sum())

Unnamed: 0,close,date,symbol
1257,216.16,2020-01-02,QQQ
1256,214.18,2020-01-03,QQQ
1255,215.56,2020-01-06,QQQ
1254,215.53,2020-01-07,QQQ
1253,217.15,2020-01-08,QQQ
...,...,...,...
4,529.96,2024-12-24,QQQ
3,529.60,2024-12-26,QQQ
2,522.56,2024-12-27,QQQ
1,515.61,2024-12-30,QQQ


close     0
date      0
symbol    0
dtype: int64

# Création de la matrice

In [14]:
# PLacer date en index pour la création de la matrice
setup_df = setup_df.set_index('date')
close_prices = setup_df['close']
p = close_prices.values

# Calcul de la matrice
pct_change_matrix_np = (p / p[:, None] - 1) * 100

# On repasse la matrice en df
pct_change_df = pd.DataFrame(
    pct_change_matrix_np,
    index=close_prices.index,
    columns=close_prices.index
)


date,2020-01-02,2020-01-03,2020-01-06,2020-01-07,2020-01-08,2020-01-09,2020-01-10,2020-01-13,2020-01-14,2020-01-15,...,2024-12-17,2024-12-18,2024-12-19,2024-12-20,2024-12-23,2024-12-24,2024-12-26,2024-12-27,2024-12-30,2024-12-31
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,0.000000,-0.915988,-0.277572,-0.291451,0.457994,1.309215,1.050148,2.215951,1.813472,1.855107,...,147.871947,138.929497,137.865470,139.942635,141.890266,145.170244,145.003701,141.746854,138.531643,136.505366
2020-01-03,0.924456,0.000000,0.644318,0.630311,1.386684,2.245775,1.984312,3.160893,2.754692,2.796713,...,150.163414,141.138295,140.064432,142.160799,144.126436,147.436735,147.268653,143.981698,140.736763,138.691755
2020-01-06,0.278345,-0.640193,0.000000,-0.013917,0.737614,1.591204,1.331416,2.500464,2.096864,2.138616,...,148.561885,139.594544,138.527556,140.610503,142.563555,145.852663,145.685656,142.419744,139.195584,137.163667
2020-01-07,0.292303,-0.626363,0.013919,0.000000,0.751636,1.605345,1.345520,2.514731,2.111075,2.152833,...,148.596483,139.627894,138.560757,140.643994,142.597318,145.886883,145.719853,142.453487,139.228878,137.196678
2020-01-08,-0.455906,-1.367718,-0.732213,-0.746028,0.000000,0.847341,0.589454,1.749942,1.349298,1.390744,...,146.741883,137.840203,136.781027,138.848722,140.787474,144.052498,143.886714,140.644716,137.444163,135.427124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,-59.212016,-59.585629,-59.325232,-59.330893,-59.025209,-58.678013,-58.783682,-58.308174,-58.472338,-58.455355,...,1.101970,-2.545475,-2.979470,-2.132236,-1.337837,0.000000,-0.067930,-1.396332,-2.707752,-3.534229
2024-12-26,-59.184290,-59.558157,-59.297583,-59.303248,-58.997356,-58.649924,-58.755665,-58.279834,-58.444109,-58.427115,...,1.170695,-2.479230,-2.913520,-2.065710,-1.270770,0.067976,0.000000,-1.329305,-2.641616,-3.468656
2024-12-27,-58.634415,-59.013319,-58.749235,-58.754976,-58.444963,-58.092851,-58.200015,-57.717774,-57.884262,-57.867039,...,2.533680,-1.165416,-1.605557,-0.746326,0.059323,1.416105,1.347214,0.000000,-1.329991,-2.168172
2024-12-30,-58.076841,-58.460852,-58.193208,-58.199026,-57.884835,-57.527977,-57.636586,-57.147844,-57.316576,-57.299121,...,3.915750,0.166793,-0.279281,0.591532,1.408041,2.783111,2.713291,1.347918,0.000000,-0.849479


In [16]:
# On accède au changement de pourcentage en renseignant la date cible en premier
# et la date de départ en second
pct_change_df['2024-12-31'].loc['2020-01-02']

136.50536639526277

# Sauvegarde

In [26]:
pct_change_df.to_csv('../csv/matrice_rendement_N100.csv', index=True)