# News

In [137]:
!pip install google-api-python-client google-auth google-auth-oauthlib google-auth-httplib2 
!pip install --upgrade google-auth-oauthlib
!pip install openai



## Channel Definitions and Setups

In [138]:
# Dictionary of channel IDs and their corresponding names
channels = {
    "CNBC_TV": "UCrp_UI8XtuYfpiqluWLD7Lw",
    "DeepWater": 'UCQCNLsdpDV1XSHH4V8WQuPA',
    "BloombergTechnology": 'UCrM7B7SL_g1edFOnmj-SDKg',
    "YahooFinance": 'UCEAZeUIeJs0IjQiqTCdVSIg'
}

# Function to get channel name from ID
def get_channel_name_by_id(channel_id, channels):
    for name, id in channels.items():
        if id == channel_id:
            return name
    return None

# Example usage
channel_id = channels["YahooFinance"]  # Change this to the desired channel ID
channel_name = get_channel_name_by_id(channel_id, channels)

print(f"Channel ID: {channel_id}")
print(f"Channel Name: {channel_name}")

Channel ID: UCEAZeUIeJs0IjQiqTCdVSIg
Channel Name: YahooFinance


In [139]:
# Fetch videos
## Getting the videos from the CNBC TV channel
period_type = 'today'  # 'today', 'days', 'weeks', 'months'
number = 4  # The 'today' setting does not use 'number', adjust if using other settings
start_date, end_date = fetch_videos.get_date_range(period_type, number)
df_videos_today = fetch_videos.fetch_videos(start_date, end_date, channel_id)

In [140]:
# Assume df_videos is your initial DataFrame loaded with video data
hour_range1 = '0-8'
hour_range2 = '8-10'
hour_range3 = '10-12'
hour_range4 = '12-14'
hour_range5 = '14-16'
hour_range6 = '16-24'

# Filter videos from the last 3 days
filtered_df = fetch_videos.filter_videos_by_date_and_time(df_videos_today, 'today', 1)
fetch_videos.display_df(filtered_df)

Title,Published At,Duration (Min),Video ID
Why the Fed can ‘print the money anyways’ 💸 #shorts #podcast,2024-08-01 10:04:16-05:00,1,6kpn4ZjBCLc


## Get video transcripts

In [141]:
# Add transcripts to the DataFrame
df_videos_with_transcripts = fetch_videos.add_transcripts_to_df(filtered_df)

In [142]:
#fetch_videos.display_df(df_videos_with_transcripts)

In [143]:
#fetch_videos.save_df_to_html(df_videos_with_transcripts, 'videos_with_transcripts.html')

## AI Summary 

In [144]:
# Import necessary libraries
import openai
import os
import pandas as pd

# Ensure you have set your OpenAI API key
openai.api_key = os.getenv('OPENAI_API_KEY')
client = openai.OpenAI()

In [145]:
def get_summary(text, client, task):
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a financial analyst."},
                {"role": "user", "content": f"Please {task} for the following text:\n\n{text}"}
            ],
            max_tokens=3000
        )
        summary = response.choices[0].message.content.strip()
        return summary
    except openai.OpenAIError as e:  # Catch the general OpenAIError
        if "maximum context length" in str(e):
            print(f"Warning: Context length exceeded for transcript. Returning an empty summary.")
            return "Context length exceeded. Summary not available."
        else:
            raise e  # Re-raise other types of OpenAI errors

def get_summary_all_transcripts(df, client, task):
    """Generates summaries or "No summary" messages for transcripts."""
    for index, row in df.iterrows():
        transcript = row['Transcript']
        summary = get_summary(transcript, client, task)

        # Check if the summary is empty or the placeholder string
        if not summary or summary == "Context length exceeded. Summary not available.":
            summary = "No summary"

        df.at[index, 'Summary'] = summary
    return df

# Example task
task = """I would like you to summarize the transcript with the following instructions: 
First categorize the video content.The category should be one of the following: Crypto, Macro, Politics, Technology Stocks, or Other. 
Then summarize the stocks that are mentioned in this video.
Then provide key takeaways in a bullet point format.
Finally, use one sentence to summarize the overall sentiment of the stocks mentioned in the video.
Please print the summary in a human-readable format like the following: 
Category: Technology Stocks
Stock mentioned: Nvidia, Tesla, Apple
Key takeaways:
* Nvidia reported record revenue growth.
* Tesla announced a new electric vehicle model.
* Apple released a new software update which will make its products more secure.
Sentiment: Cautiously Positive
"""

# Get summaries for all transcripts
df_summaries = get_summary_all_transcripts(df_videos_with_transcripts, client, task)

In [146]:
summary_file_name = f'summaries_{channel_name}.csv'
df_summaries.to_csv(summary_file_name, index=False)

In [147]:
# Display the summaries without the original transcripts
df_summaries_display = df_summaries[['Title', 'Summary']]
df_summaries_display

Unnamed: 0,Title,Summary
0,Why the Fed can ‘print the money anyways’ 💸 #s...,Category: Macro\n\nStock mentioned: None\n\nKe...


## Send emails

In [148]:
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import pandas as pd
import os
import fetch_videos

email_user = os.getenv('EMAIL_USER')
email_password = os.getenv('EMAIL_PASSWORD')
email_send1 = 'sliu810@gmail.com'
email_send2 = 'zhengwang827@gmail.com'

recipients = [email_send1]
# Set up the MIME
message = MIMEMultipart()
message['From'] = email_user
message['To'] = email_send1
message['Subject'] = f'summaries_{channel_name}_{fetch_videos.get_formated_date_today()}'

# load df_summaries from disk and get the html content
df_summaries = pd.read_csv(summary_file_name)
html_content= fetch_videos.get_html_content_summary_only(df_summaries)

# Attach the HTML content to the email
message.attach(MIMEText(html_content, 'html'))

# Function to send email
def send_email():
    try:
        server = smtplib.SMTP('smtp-mail.outlook.com', 587)  # Outlook SMTP server
        #server.set_debuglevel(1)  # Enable debugging output
        server.starttls()
        server.login(email_user, email_password)
        text = message.as_string()
        server.sendmail(email_user, recipients, text)
        server.quit()
        print("Email sent successfully!")
    except Exception as e:
        print(f"Failed to send email: {e}")

# Send the email
send_email()

Email sent successfully!


In [149]:
import fetch_videos

df_summaries = pd.read_csv(summary_file_name)
html_content = fetch_videos.get_html_content_summary_only(df_summaries)
print(html_content)

summary_file_name_html = f'summaries_{channel_name}_{fetch_videos.get_formated_date_today()}.html'

# Save the HTML content to an HTML file
with open(summary_file_name_html, 'w') as file:
    file.write(html_content)

<div><h3><a href="https://www.youtube.com/watch?v=6kpn4ZjBCLc" target="_blank">Why the Fed can ‘print the money anyways’ 💸 #shorts #podcast</a></h3><p><br><b>Category:</b> Macro</li></li>Stock mentioned: None</li></li><br><b>Key takeaways:</b><ul></li>* The Federal Reserve Bank was created in 1913, giving it the power to monitor the monetary supply by printing money and changing interest rates.</li>* The US dollar is no longer backed by gold, allowing the government to spend more money without the need for additional gold reserves as collateral.</li>* This system can lead to inflation due to increased spending without traditional asset backing.</li></li>Sentiment: Concerns raised about potential inflation due to the government's ability to increase spending without traditional asset backing.</li></ul></p></div>



## Random one off experiement 

In [150]:
# print(transcript)

In [151]:
# transcript = fetch_videos.get_transcript('GabBf791bdY')
# print(transcript)
# # Example task
# task2 = """I would like you to summarize the text which describes Tesla FSD.
# Please say in what senario FSD performed well and waht senario it didn't perform well.
# Format like this:
# Title: 
# Key takeaways:
# * FSD performed well in ...
# * FSD had some challenges in ....
# Improvement over pervious version if any.
# """
# task3 = """could you print the transcript with two person speaking into a human readible format and maintain the original content?"""

# summary = get_summary(transcript, client, task3)
# print(summary)

In [152]:
def get_summary_in_chunks(transcript, client, task, chunk_size=500):
    """
    Generates summaries in chunks to avoid truncation.

    Parameters:
    - transcript (str): The full transcript text.
    - client (object): The client to use for generating summaries.
    - task (str): The task identifier for the summary generation.
    - chunk_size (int): The maximum size of each chunk.

    Returns:
    - str: Combined summary of all chunks.
    """
    import textwrap

    # Split the transcript into chunks
    chunks = textwrap.wrap(transcript, chunk_size)
    full_summary = ""

    for chunk in chunks:
        summary = get_summary(chunk, client, task)
        full_summary += summary + "\n"

    return full_summary

In [153]:
# transcript = fetch_videos.get_transcript('GabBf791bdY')
# # print(transcript)
# # Example task
# task2 = """I would like you to summarize the text which describes Tesla FSD.
# Please say in what senario FSD performed well and waht senario it didn't perform well.
# Format like this:
# Title: 
# Key takeaways:
# * FSD performed well in ...
# * FSD had some challenges in ....
# Improvement over pervious version if any.
# """
# task3 = """tell what the transcripts say about capex"""

# summary = get_summary(transcript, client, task3)
# print(summary)