# Netflix Stock Price Analysis – Data Visualization Portfolio

This project visualizes Netflix's stock performance to reveal trends, volatility, and relationships with market indices and competitors.

Key questions addressed:

- What are the main trends in Netflix's stock price?
- How volatile is Netflix's stock?
- How often does Netflix beat analyst EPS estimates?
- How does Netflix's stock correlate with major US indices?
- How does Netflix's stock compare to competitors like Disney and Spotify?


In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

def clean_stock_data(df):
    df = df.rename({'Close':'Price'}, axis=1)
    float_cols = df.select_dtypes(include='float64').columns
    df[float_cols] = df[float_cols].round(2)
    return df

def get_year_month(df):
    split_date = df['Date'].str.split('-', expand=True)
    df['Year'] = split_date[0]
    df['Month'] = split_date[1]
    df['Month-Year'] = df['Month'].str.cat(df['Year'], sep='-')
    return df

def plot_monthly_prices(df, title, color):
    plt.figure(figsize=(12, 5), dpi=180)
    plt.title(title)
    plt.plot(df['Month-Year'], df['Price'], color=color)
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.tick_params(axis='x', labelsize='small', labelrotation=-70)
    plt.show()

def plot_daily_return_distribution(df):
    plt.figure(figsize=(12, 6), dpi=150)
    ax = sns.histplot(data=df, x='Daily Pct Return', color='#40B0A6')
    ax.set(title="Netflix Distribution of Daily Returns (2020 - 2022)", xlabel="Daily Returns (%)", ylabel="Count")
    mean = df['Daily Pct Return'].mean()
    std = df['Daily Pct Return'].std()
    ax.axline((mean, 0), (mean, 1), color='red', linestyle='--', label='Mean Daily Return (%)')
    for i, color in zip([1,2,3], ['lightblue','blue','darkblue']):
        ax.axline((mean + i*std, 0), (mean + i*std, 1), color=color, linestyle='--', label=f'Mean +/- {i} Std')
        ax.axline((mean - i*std, 0), (mean - i*std, 1), color=color, linestyle='--')
    plt.legend()
    plt.show()

def plot_monthly_boxplots(df):
    plt.figure(figsize=(6, 8), tight_layout=True, dpi=150)
    for i, year in enumerate(['2020', '2021', '2022'], 1):
        ax = plt.subplot(3, 1, i)
        sns.boxplot(x='Month', y='Daily Pct Return', data=df[df['Year']==year])
        ax.set(title=f"Netflix Daily Returns by Month ({year})", xlabel="Month", ylabel="Daily Returns (%)")
    plt.show()

def calc_yearly_pct_change(df):
    yearly = df.groupby('Year').agg({'Open': 'first', 'Price':'last'}).reset_index()
    yearly.columns = ['Year', 'Open', 'Close']
    yearly['Yearly Pct Change'] = (yearly['Close'] - yearly['Open']) / yearly['Open'] * 100
    return yearly

def clean_eps_data(df):
    df['Estimated'] = df['Estimated'].str.replace('$', '', regex=False).astype(float)
    df['Actual'] = df['Actual'].str.replace('$', '', regex=False).astype(float)
    df['Pct Diff'] = (df['Actual'] - df['Estimated']) / df['Estimated'] * 100
    df = get_year_month(df)
    df['Quarter'] = df['Month'].replace({'01':'Q1','04':'Q2','07':'Q3','10':'Q4'})
    df['Quarter-Year'] = df['Quarter'].str.cat(df['Year'], sep='-')
    return df

def plot_eps_bar(df):
    plt.figure(figsize=(12,6), dpi=150)
    bar_width = 0.4
    plt.bar(df.index, df['Estimated'], width=bar_width, label='Estimated EPS', color='#E1B16A')
    plt.bar(df.index + bar_width, df['Actual'], width=bar_width, label='Actual EPS', color='#40B0A6')
    plt.title('Netflix Historical EPS Estimates vs. Actual')
    plt.xlabel('Report Date')
    plt.ylabel('Earnings Per Share (EPS)')
    plt.xticks(df.index + bar_width / 2, df['Quarter-Year'].astype(str), rotation=-70)
    plt.legend()
    plt.show()

def plot_eps_pct_diff(df):
    plt.figure(figsize=(8,4), dpi=140)
    ax = sns.barplot(x=df['Quarter-Year'], y=df['Pct Diff'], color='#91B188')
    ax.set(title="Netflix Historical EPS Estimates vs. Actual Difference", xlabel="Report Date", ylabel="Percent Difference")
    ax.axhline(0, linewidth=1.0, color='black')
    plt.tick_params(axis='x', labelsize='small', labelrotation=-70)
    plt.show()

def plot_indices(nflx, dow, sp500, nasdaq):
    plt.figure(figsize=(10, 14), tight_layout=True, dpi=200)
    for i, (df, title, color) in enumerate([
        (nflx, "Netflix Stock Price (2020-2022)", "#E50914"),
        (dow, "Dow Jones Index (2020-2022)", "slategray"),
        (sp500, "S&P 500 Index (2020-2022)", "darkblue"),
        (nasdaq, "Nasdaq Composite Index (2020-2022)", "orange")
    ], 1):
        ax = plt.subplot(4, 1, i)
        sns.lineplot(data=df, x='Month-Year', y='Price', color=color)
        ax.set(title=title, xlabel="Date", ylabel="Price (USD)")
        ax.tick_params(axis='x', labelrotation=-70)
    plt.show()

def plot_price_ratio(nflx, other, ratio_col, other_label, mean, std, upper, lower, annotations):
    plt.figure(figsize=(12, 10), tight_layout=True, dpi=180)
    ax1 = plt.subplot(3, 1, (1,2))
    sns.lineplot(data=nflx, x='Date', y='Close', label='Netflix Price', color='#E50914')
    sns.lineplot(data=other, x='Date', y='Close', label=f'{other_label} Price', color=annotations['color'])
    ax1.set(title=f"Netflix / {other_label} Stock Prices (2020-2022)", xlabel="Date", ylabel="Price (USD)")
    ax2 = plt.subplot(3, 1, 3)
    sns.lineplot(data=nflx, x='Date', y=ratio_col, label='Price Ratio', color='gray')
    ax2.axhline(mean, linestyle='--', label='Mean Ratio', color='black')
    ax2.axhline(upper, linestyle='--', label='Mean Ratio +/- 1 Std', color='lightgray')
    ax2.axhline(lower, linestyle='--', color='lightgray')
    ax2.set(title=f"Netflix / {other_label} Price Ratio", xlabel="Date", ylabel="Price Ratio")
    for ann in annotations['points']:
        ax2.annotate(ann['text'], xy=ann['xy'], color='red')
    plt.legend()
    plt.show()

# Load and clean data
netflix_daily_df = pd.read_csv('datasets/NFLX_daily_prices.csv')
netflix_monthly_df = pd.read_csv('datasets/NFLX_monthly_prices.csv')
netflix_daily_cleaned = clean_stock_data(netflix_daily_df)
netflix_monthly_cleaned = clean_stock_data(netflix_monthly_df)
netflix_daily_cleaned = get_year_month(netflix_daily_cleaned)
netflix_monthly_cleaned = get_year_month(netflix_monthly_cleaned)

print(netflix_daily_df.info())
print(netflix_monthly_df.info())

plot_monthly_prices(netflix_monthly_cleaned, 'Netflix Stock Price (2020 - 2022)', "#E50914")
netflix_daily_cleaned['Daily Pct Return'].describe()
plot_daily_return_distribution(netflix_daily_cleaned)
plot_monthly_boxplots(netflix_daily_cleaned)

netflix_yearly_returns = calc_yearly_pct_change(netflix_monthly_cleaned)
print(netflix_yearly_returns)

nflx_eps = pd.read_csv('datasets/NFLX_eps_history.csv')
nflx_eps = clean_eps_data(nflx_eps)
print(nflx_eps)
plot_eps_bar(nflx_eps)
plot_eps_pct_diff(nflx_eps)

# Load indices
nflx_monthly = pd.read_csv('datasets/NFLX_monthly_prices.csv')
dow_monthly = pd.read_csv('datasets/DJI_monthly_prices.csv')
sp500_monthly  = pd.read_csv('datasets/SP500_monthly_prices.csv')
nasdaq_monthly = pd.read_csv('datasets/NASDAQ_monthly_prices.csv')
nflx_monthly = clean_stock_data(nflx_monthly)
dow_monthly = clean_stock_data(dow_monthly)
sp500_monthly = clean_stock_data(sp500_monthly)
nasdaq_monthly = clean_stock_data(nasdaq_monthly)
nflx_monthly = get_year_month(nflx_monthly)
dow_monthly = get_year_month(dow_monthly)
sp500_monthly = get_year_month(sp500_monthly)
nasdaq_monthly = get_year_month(nasdaq_monthly)
plot_indices(nflx_monthly, dow_monthly, sp500_monthly, nasdaq_monthly)

# Load competitors
dis_monthly = pd.read_csv('datasets/DIS_monthly_prices.csv')
spot_monthly = pd.read_csv('datasets/SPOT_monthly_prices.csv')
nflx_monthly['Date'] = nflx_monthly['Date'].astype('datetime64')
dis_monthly['Date'] = dis_monthly['Date'].astype('datetime64')
spot_monthly['Date'] = spot_monthly['Date'].astype('datetime64')

nflx_monthly['DIS_ratio'] = nflx_monthly['Close'] / dis_monthly['Close']
nflx_dis_mean_ratio = nflx_monthly['DIS_ratio'].mean()
nflx_dis_std_ratio = nflx_monthly['DIS_ratio'].std()
nflx_dis_upper_bound = nflx_dis_mean_ratio + nflx_dis_std_ratio
nflx_dis_lower_bound = nflx_dis_mean_ratio - nflx_dis_std_ratio
print("Netflix/Disney Price Ratio Mean: ", nflx_dis_mean_ratio)
print("Netflix/Disney Price Ratio Standard Deviation: ", nflx_dis_std_ratio)

plot_price_ratio(
    nflx_monthly, dis_monthly, 'DIS_ratio', 'Disney',
    nflx_dis_mean_ratio, nflx_dis_std_ratio, nflx_dis_upper_bound, nflx_dis_lower_bound,
    {
        'color': '#113CCF',
        'points': [
            {'text': 'Mean Reversion', 'xy': (pd.to_datetime('2020-06-01'), 4.25)},
            {'text': 'Mean Reversion', 'xy': (pd.to_datetime('2021-10-01'), 4.43)},
            {'text': 'Mean Reversion', 'xy': (pd.to_datetime('2022-03-01'), 1.58)},
        ]
    }
)

nflx_monthly['SPOT_ratio'] = nflx_monthly['Close'] / spot_monthly['Close']
nflx_spot_mean_ratio = nflx_monthly['SPOT_ratio'].mean()
nflx_spot_std_ratio = nflx_monthly['SPOT_ratio'].std()
nflx_spot_upper_bound = nflx_spot_mean_ratio + nflx_spot_std_ratio
nflx_spot_lower_bound = nflx_spot_mean_ratio - nflx_spot_std_ratio
print("Netflix/Spotify Price Ratio Mean: ", nflx_spot_mean_ratio)
print("Netflix/Spotify Price Ratio Standard Deviation: ", nflx_spot_std_ratio)

plot_price_ratio(
    nflx_monthly, spot_monthly, 'SPOT_ratio', 'Spotify',
    nflx_spot_mean_ratio, nflx_spot_std_ratio, nflx_spot_upper_bound, nflx_spot_lower_bound,
    {
        'color': '#1DB954',
        'points': [
            {'text': 'Mean Reversion', 'xy': (pd.to_datetime('2020-02-01'), 3.1)},
            {'text': 'Mean Reversion', 'xy': (pd.to_datetime('2020-11-01'), 1.7)},
            {'text': 'Mean Reversion', 'xy': (pd.to_datetime('2022-03-01'), 1.7)},
            {'text': 'Potential \nMean Reversion (?)', 'xy': (pd.to_datetime('2022-09-01'), 3.85)},
        ]
    }
)
