In [2]:
# Generate timestamps from 8 AM to 11:45 PM on September 23, 2024
from datetime import datetime, timedelta
import requests
import zipfile
import io
import pandas as pd

start_time = datetime(2024, 9, 23, 8, 0, 0)
end_time = datetime(2024, 9, 23, 23, 45, 0)
timestamps = []
current_time = start_time
while current_time <= end_time:
    timestamps.append(current_time.strftime("%Y%m%d%H%M%S"))
    current_time += timedelta(minutes=15)

# Create a dictionary to store average Tone for each timestamp
avg_tones = {}

# Loop through timestamps, download, process, and discard each file
for timestamp in timestamps:
    url = f"http://data.gdeltproject.org/gdeltv2/{timestamp}.gkg.csv.zip"
    response = requests.get(url, stream=True)

    # Process the zip file in memory without extracting to disk
    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
        for file_name in zip_ref.namelist():
            if file_name.endswith(".gkg.csv"):
                with zip_ref.open(file_name) as file:
                    # Specify the encoding as 'latin-1' to handle potential encoding issues
                    df = pd.read_csv(file, sep='\t', header=None,
                                     on_bad_lines='skip', # Skip lines with errors
                                     engine='python', # Use Python engine to handle large files
                                     encoding='latin-1') # Explicitly set encoding to 'latin-1'

                # Add column names (refer to GDELT documentation)
                gkg_columns = [
                    "GKGRECORDID", "DATE", "SourceCollectionIdentifier", "SourceCommonName",
                    "DocumentIdentifier", "Counts", "V2Counts", "Themes", "V2Themes",
                    "Locations", "V2Locations", "Persons", "V2Persons", "Organizations",
                    "V2Organizations", "V2Tone", "Dates", "GCAM", "SharingImage",
                    "RelatedImages", "SocialImageEmbeds", "SocialVideoEmbeds", "Quotations",
                    "AllNames", "Amounts", "TranslationInfo", "Extras"
                ]
                df.columns = gkg_columns

                # Filter for news related to Netflix
                netflix_news = df[df['Organizations'].str.contains("netflix", na=False)].copy()

                # Extract Tone components (similar to previous code)
                netflix_news[['Tone', 'Positive Score', 'Negative Score', 'Polarity', 'Activity', 'Self Direction', 'WordCount']] = netflix_news['V2Tone'].str.split(',', expand=True)
                numeric_columns = ['Tone', 'Positive Score', 'Negative Score', 'Polarity', 'Activity', 'Self Direction']
                netflix_news[numeric_columns] = netflix_news[numeric_columns].astype(float, errors='ignore').round(3) #ignore errors

                # Calculate and store average Tone
                avg_tone = netflix_news['Tone'].mean()
                avg_tones[timestamp] = avg_tone

    # File is automatically discarded when exiting the 'with' block

# Create a DataFrame from the avg_tones dictionary converting 'Timestamp' column to datetime objects
tone_df = pd.DataFrame(list(avg_tones.items()), columns=['Timestamp', 'AvgTone'])
tone_df['Timestamp'] = pd.to_datetime(tone_df['Timestamp'], format='%Y%m%d%H%M%S')
tone_df

Unnamed: 0,Timestamp,AvgTone
0,2024-09-23 08:00:00,-0.531833
1,2024-09-23 08:15:00,-3.236250
2,2024-09-23 08:30:00,-0.025667
3,2024-09-23 08:45:00,1.060857
4,2024-09-23 09:00:00,-2.863167
...,...,...
59,2024-09-23 22:45:00,0.120400
60,2024-09-23 23:00:00,0.489000
61,2024-09-23 23:15:00,-0.892667
62,2024-09-23 23:30:00,1.537625


In [5]:
# Download the intraday data using yfinance
import yfinance as yf

from datetime import datetime, timedelta

# Use only last 59 days to stay safe
end_date = datetime.today()
start_date = end_date - timedelta(days=59)

nflx_intraday = yf.download(
    tickers='NFLX',
    start=start_date.strftime('%Y-%m-%d'),
    end=end_date.strftime('%Y-%m-%d'),
    interval='15m',
    prepost=True
)

nflx_intraday.to_csv("netflix_intraday_latest.csv")


# Display DataFrame
nflx_intraday

  nflx_intraday = yf.download(
[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,NFLX,NFLX,NFLX,NFLX,NFLX
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-05-06 08:00:00+00:00,1128.47,1128.75,1126.0600,1128.75,0
2025-05-06 08:15:00+00:00,1125.22,1128.47,1125.2200,1128.47,0
2025-05-06 08:30:00+00:00,1127.07,1127.17,1124.5200,1125.05,0
2025-05-06 08:45:00+00:00,1126.51,1126.79,1126.5100,1126.79,0
2025-05-06 09:00:00+00:00,1125.21,1126.60,1125.2100,1126.51,0
...,...,...,...,...,...
2025-07-03 19:45:00+00:00,1295.21,1295.82,1294.8401,1295.33,0
2025-07-03 20:00:00+00:00,1295.21,1295.82,1294.6000,1295.21,0
2025-07-03 20:15:00+00:00,1295.33,1295.82,1294.8400,1294.90,0
2025-07-03 20:30:00+00:00,1295.11,1295.82,1294.8400,1295.33,0


In [7]:
# Convert Datetime to UTC timezone
nflx_intraday.index = pd.to_datetime(nflx_intraday.index).tz_convert('UTC')
nflx_intraday

Price,Close,High,Low,Open,Volume
Ticker,NFLX,NFLX,NFLX,NFLX,NFLX
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-05-06 08:00:00+00:00,1128.47,1128.75,1126.0600,1128.75,0
2025-05-06 08:15:00+00:00,1125.22,1128.47,1125.2200,1128.47,0
2025-05-06 08:30:00+00:00,1127.07,1127.17,1124.5200,1125.05,0
2025-05-06 08:45:00+00:00,1126.51,1126.79,1126.5100,1126.79,0
2025-05-06 09:00:00+00:00,1125.21,1126.60,1125.2100,1126.51,0
...,...,...,...,...,...
2025-07-03 19:45:00+00:00,1295.21,1295.82,1294.8401,1295.33,0
2025-07-03 20:00:00+00:00,1295.21,1295.82,1294.6000,1295.21,0
2025-07-03 20:15:00+00:00,1295.33,1295.82,1294.8400,1294.90,0
2025-07-03 20:30:00+00:00,1295.11,1295.82,1294.8400,1295.33,0


In [9]:
import plotly.graph_objects as go

# Create candlestick trace
candlestick_trace = go.Candlestick(x=nflx_intraday.index,
                                 open=nflx_intraday['Open'],
                                 high=nflx_intraday['High'],
                                 low=nflx_intraday['Low'],
                                 close=nflx_intraday['Close'],
                                 name='Netflix Price')

# Create tone trace
tone_trace = go.Scatter(x=tone_df['Timestamp'],
                        y=tone_df['AvgTone'],
                        mode='lines',
                        name='Average Tone',
                        line=dict(color='blue', width=1),
                        yaxis='y2')  # Assign to secondary y-axis

# Create figure with both traces
fig = go.Figure(data=[candlestick_trace, tone_trace])

# Update layout with secondary y-axis
fig.update_layout(title_text='Netflix Price and Average Tone',
                  yaxis_title='Price (USD)',
                  yaxis2=dict(title='Average Tone',
                              overlaying='y',
                              side='right'))
fig.show()