# Youtube Study

In [27]:
# !pip install google-api-python-client google-auth google-auth-oauthlib google-auth-httplib2 
# !pip install --upgrade google-auth-oauthlib
# !pip install openai

In [28]:
# Imports and APIs
import fetch_videos
import openai
import os
import pandas as pd
# Ensure you have set your OpenAI API key
openai.api_key = os.getenv('OPENAI_API_KEY')
client = openai.OpenAI()

## Channel Definitions and Setups

# Daily News Videos

In [29]:
## Specify channel ID and name
channel_id = fetch_videos.channels["CNBC_TV"]  # Change this to the desired channel ID
channel_name = fetch_videos.get_channel_name_by_id(channel_id, fetch_videos.channels)
summary_file_name_today = f'summaries_{channel_name}_{fetch_videos.get_formated_date_today()}'
summary_file_name_today_cvs = f'{summary_file_name_today}.csv'
summary_file_name_today_html = f'{summary_file_name_today}.html'

In [30]:


## Fetch videos
period_type = 'today'  # 'today', 'days', 'weeks', 'months'
number = 1  # The 'today' setting does not use 'number', adjust if using other settings
start_date, end_date = fetch_videos.get_date_range(period_type, number)
df_videos_today = fetch_videos.fetch_videos(start_date, end_date, channel_id,summary_file_name_today_cvs)

# ## Filter Vidoes by time
# # Assume df_videos is your initial DataFrame loaded with video data
# hour_range1 = '0-8'
# hour_range2 = '8-10'
# hour_range3 = '10-12'
# hour_range4 = '12-14'
# hour_range5 = '14-16'
# hour_range6 = '16-24'

# # Filter videos from the last 3 days
# filtered_df = fetch_videos.filter_videos_by_date_and_time(df_videos_today, 'today', 1)
# # fetch_videos.display_df(filtered_df)

## Add transcripts
df_videos_with_transcripts = fetch_videos.add_transcripts_to_df(df_videos_today)
# fetch_videos.display_df(df_videos_with_transcripts,include_video_id=True,include_transcript=True)
df_videos_with_transcripts.to_csv(summary_file_name_today_cvs, index=False)

Appended 2 new videos to summaries_CNBC_TV_2024-08-21.csv.


### AI Summary 

In [31]:
## Assign Tasks: Get Summary
task_summary = """I would like you to summarize the transcript with the following instructions: 
First categorize the video content.The category should be one of the following: Crypto, Macro, Politics, Technology, Small Caps or Other. 
Then summarize the stocks that are mentioned in this video.
Then provide key takeaways in a bullet point format.Please make sure don't miss anything about small cap, Nvidia, Tesla, Meta and Macro is mentioned in the transcript.
Please print the summary in a human-readable format like the following: 
Category: Technology
Stock mentioned: STOCK1, STOCK2, STOCK3
Key takeaways:
* takeaway 1
* takeaway 2
* takeaway 3
"""
# load df_videos_with_transcripts from CSV
df_videos_with_transcripts = pd.read_csv(summary_file_name_today_cvs)

# Get summaries for all transcripts
df_summaries = fetch_videos.apply_tasks_on_all_transcripts(df_videos_with_transcripts, client, task_summary)

# Save summaries to a CSV file
df_summaries.to_csv(summary_file_name_today_cvs, index=False)

In [32]:
## Save summaries to an HTML file 
df_summaries = pd.read_csv(summary_file_name_today_cvs)
html_content = fetch_videos.get_html_content_summary_only(df_summaries)
# print(html_content)

# Save the HTML content to an HTML file
with open(summary_file_name_today_html, 'w') as file:
    file.write(html_content)

In [33]:
# save titles and transcripts to a text file
summary_file_name_today_txt = f'{summary_file_name_today}.txt'
fetch_videos.save_videos_to_text(df_summaries, summary_file_name_today_txt,"Title", "Summary")

In [34]:
task2 = """Please tell me anything discussed in the file about small cap or russell stocks. 
Please also provide which video or speaker side this
"""
with open(summary_file_name_today_txt, 'r') as file:
    all_transcripts = file.read()
summary = fetch_videos.apply_task(all_transcripts, client, task2)
print(summary)

The text discussing small cap or Russell stocks is from the article titled "Consumer spending environment remains tepid at best, says CFRA's Arun Sundaram." This article mentions small cap stocks such as Target, Walmart, Home Depot, Lowes, Amazon, Costco, and Macy's. It highlights that big box retailers like Walmart, Target, and Costco are performing well, with a focus on value, newness, and broad product assortment. It also notes that consumer spending remains selective, leaning towards big box retailers, and that promotions and discounts are impacting retail sales positively.


### AI for all transcripts

## Send emails

In [35]:
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import pandas as pd
import os
import fetch_videos

email_user = os.getenv('EMAIL_USER')
email_password = os.getenv('EMAIL_PASSWORD')
email_send1 = 'sliu810@gmail.com'
email_send2 = 'zhengwang827@gmail.com'

recipients = [email_send1, email_send2]
# Set up the MIME
message = MIMEMultipart()
message['From'] = email_user
message['To'] = email_send1
message['Subject'] = f'summaries_{channel_name}_{fetch_videos.get_formated_date_today()}'

# load df_summaries from disk and get the html content
df_summaries = pd.read_csv(summary_file_name_today_cvs)
html_content= fetch_videos.get_html_content_summary_only(df_summaries)

# Attach the HTML content to the email
message.attach(MIMEText(html_content, 'html'))

# Function to send email
def send_email():
    try:
        server = smtplib.SMTP('smtp-mail.outlook.com', 587)  # Outlook SMTP server
        #server.set_debuglevel(1)  # Enable debugging output
        server.starttls()
        server.login(email_user, email_password)
        text = message.as_string()
        server.sendmail(email_user, recipients, text)
        server.quit()
        print("Email sent successfully!")
    except Exception as e:
        print(f"Failed to send email: {e}")

# Send the email
send_email()

Email sent successfully!


# One Off Study

In [36]:
# transcript = fetch_videos.get_transcript('GabBf791bdY')
# print(transcript)
# # Example task
# task2 = """I would like you to summarize the text which describes Tesla FSD.
# Please say in what senario FSD performed well and waht senario it didn't perform well.
# Format like this:
# Title: 
# Key takeaways:
# * FSD performed well in ...
# * FSD had some challenges in ....
# Improvement over pervious version if any.
# """
# task3 = """could you print the transcript with two person speaking into a human readible format and maintain the original content?"""

# summary = get_summary(transcript, client, task3)
# print(summary)

In [37]:
def get_summary_in_chunks(transcript, client, task, chunk_size=500):
    """
    Generates summaries in chunks to avoid truncation.

    Parameters:
    - transcript (str): The full transcript text.
    - client (object): The client to use for generating summaries.
    - task (str): The task identifier for the summary generation.
    - chunk_size (int): The maximum size of each chunk.

    Returns:
    - str: Combined summary of all chunks.
    """
    import textwrap

    # Split the transcript into chunks
    chunks = textwrap.wrap(transcript, chunk_size)
    full_summary = ""

    for chunk in chunks:
        summary = get_summary(chunk, client, task)
        full_summary += summary + "\n"

    return full_summary

In [38]:
# transcript = fetch_videos.get_transcript('I8JzsnZVylY')
# print(transcript)

In [39]:
# task2 = """I would like you to summarize the text which describes Tesla FSD.
# Please say in what senario FSD performed well and waht senario it didn't perform well.
# Format like this:
# Title: 
# Key takeaways:
# * FSD performed well in ...
# * FSD had some challenges in ....
# Improvement over pervious version if any.
# """
# task3 = """"""

# # summary = fetch_videos.apply_task(transcript, client, task3)
# print(summary)