In [1]:
import yfinance as yf
import pandas as pd

tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "NVDA"]
data = {}

for t in tickers:
    info = yf.Ticker(t)
    fin = info.financials.T  # trimestrali
    fin["Ticker"] = t
    data[t] = fin[["Total Revenue", "Research Development", "Ticker"]]

df = pd.concat(data.values())
df = df.reset_index().rename(columns={"index": "Date"})
df["Year"] = df["Date"].dt.year
df["R&D_ratio"] = df["Research Development"] / df["Total Revenue"]

KeyError: "['Research Development'] not in index"

In [None]:
df = df.sort_values(["Ticker", "Year"])
df["Revenue_5y_ahead"] = df.groupby("Ticker")["Total Revenue"].shift(-5)
df = df.dropna(subset=["R&D_ratio", "Revenue_5y_ahead"])

In [None]:
import statsmodels.api as sm

X = sm.add_constant(df["R&D_ratio"])
y = df["Revenue_5y_ahead"]
model = sm.OLS(y, X).fit()
print(model.summary())

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(df["R&D_ratio"], df["Revenue_5y_ahead"] / 1e9, color="steelblue")
x_line = pd.Series(sorted(df["R&D_ratio"]))
y_line = model.params["const"] + model.params["R&D_ratio"] * x_line
plt.plot(x_line, y_line / 1e9, color="orange", label="Fit OLS")
plt.xlabel("Quota R&D / Fatturato (anno t)")
plt.ylabel("Fatturato (miliardi USD) a t+5")
plt.title("Relazione tra investimenti in R&D e crescita del fatturato")
plt.legend()
plt.tight_layout()
plt.show()