# Youtube Study

In [8]:
!pip install google-api-python-client google-auth google-auth-oauthlib google-auth-httplib2 
!pip install --upgrade google-auth-oauthlib
!pip install openai



In [9]:
# Imports and APIs
import fetch_videos
import openai
import os
import pandas as pd

# Ensure you have set your OpenAI API key
openai.api_key = os.getenv('OPENAI_API_KEY')
client = openai.OpenAI()

## Channel Definitions and Setups

# Daily News Videos

In [10]:
## Specify channel ID and name
channel_id = fetch_videos.channels["CNBC_TV"]  # Change this to the desired channel ID
channel_name = fetch_videos.get_channel_name_by_id(channel_id, fetch_videos.channels)
summary_file_name_today = f'summaries_{channel_name}_{fetch_videos.get_formated_date_today()}'
summary_file_name_today_cvs = f'{summary_file_name_today}.csv'
summary_file_name_today_html = f'{summary_file_name_today}.html'

In [11]:


## Fetch videos
period_type = 'today'  # 'today', 'days', 'weeks', 'months'
number = 1  # The 'today' setting does not use 'number', adjust if using other settings
start_date, end_date = fetch_videos.get_date_range(period_type, number)
df_videos_today = fetch_videos.fetch_videos(start_date, end_date, channel_id,summary_file_name_today_cvs)

# ## Filter Vidoes by time
# # Assume df_videos is your initial DataFrame loaded with video data
# hour_range1 = '0-8'
# hour_range2 = '8-10'
# hour_range3 = '10-12'
# hour_range4 = '12-14'
# hour_range5 = '14-16'
# hour_range6 = '16-24'

# # Filter videos from the last 3 days
# filtered_df = fetch_videos.filter_videos_by_date_and_time(df_videos_today, 'today', 1)
# # fetch_videos.display_df(filtered_df)

## Add transcripts
df_videos_with_transcripts = fetch_videos.add_transcripts_to_df(df_videos_today)
# fetch_videos.display_df(df_videos_with_transcripts,include_video_id=True,include_transcript=True)
df_videos_with_transcripts.to_csv(summary_file_name_today_cvs, index=False)

Appended 4 new videos to summaries_CNBC_TV_2024-08-08.csv.
An error occurred for video ID -rbHIHSRLaI: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=-rbHIHSRLaI! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!


### AI Summary 

In [12]:
## Assign Tasks: Get Summary
task_summary = """I would like you to summarize the transcript with the following instructions: 
First categorize the video content.The category should be one of the following: Crypto, Macro, Politics, Technology, Small Caps or Other. 
Then summarize the stocks that are mentioned in this video.
Then provide key takeaways in a bullet point format.Please make sure don't miss anything about small cap, Nvidia, Tesla, Meta and Macro is mentioned in the transcript.
Please print the summary in a human-readable format like the following: 
Category: Technology
Stock mentioned: STOCK1, STOCK2, STOCK3
Key takeaways:
* takeaway 1
* takeaway 2
* takeaway 3
"""
# load df_videos_with_transcripts from CSV
# df_videos_with_transcripts = pd.read_csv(summary_file_name_today_cvs)

# Get summaries for all transcripts
df_summaries = fetch_videos.apply_tasks_on_all_transcripts(df_videos_with_transcripts, client, task_summary)

# Save summaries to a CSV file
df_summaries.to_csv(summary_file_name_today_cvs, index=False)

In [13]:
## Save summaries to an HTML file 
df_summaries = pd.read_csv(summary_file_name_today_cvs)
html_content = fetch_videos.get_html_content_summary_only(df_summaries)
# print(html_content)

# Save the HTML content to an HTML file
with open(summary_file_name_today_html, 'w') as file:
    file.write(html_content)

In [14]:
# save titles and transcripts to a text file
summary_file_name_today_txt = f'{summary_file_name_today}.txt'
fetch_videos.save_videos_to_text(df_summaries, summary_file_name_today_txt,"Title", "Summary")

In [15]:
task2 = """Please tell me anything discussed in the file about small cap or russell stocks. 
Please also provide which video or speaker side this
"""
with open(summary_file_name_today_txt, 'r') as file:
    all_transcripts = file.read()
summary = fetch_videos.apply_task(all_transcripts, client, task2)
print(summary)

The text discusses small-cap stocks and includes mentions of the Russell stocks category in several areas. Notably, the mentions are found in the discussions led by Moneta's Aoifinn Devitt and the summary of the insights shared by Dan Flax of Neuberger Berman:

1. **Discussion with Aoifinn Devitt:** 
   - The focus is on the performance of small and mid-cap stocks, which are noted to be currently down by 2% for the quarter. However, they are expected to outperform the broader market in profit growth terms.
   - Small and mid-cap stocks are predicted to experience significantly faster profit growth than the S&P 500, with anticipated growth in the high teens to over 20%, contrasting with the S&P 500's expected growth in the low teens.
   - There is an expectation of potential rate cuts which could benefit small and mid-caps more due to their lower interest coverage ratio compared to larger companies.

2. **Dan Flax's Insights:** 
   - Discussion mentions a focus on small and mid-cap stoc

### AI for all transcripts

## Send emails

In [16]:
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import pandas as pd
import os
import fetch_videos

email_user = os.getenv('EMAIL_USER')
email_password = os.getenv('EMAIL_PASSWORD')
email_send1 = 'sliu810@gmail.com'
email_send2 = 'zhengwang827@gmail.com'

recipients = [email_send1, email_send2]
# Set up the MIME
message = MIMEMultipart()
message['From'] = email_user
message['To'] = email_send1
message['Subject'] = f'summaries_{channel_name}_{fetch_videos.get_formated_date_today()}'

# load df_summaries from disk and get the html content
df_summaries = pd.read_csv(summary_file_name_today_cvs)
html_content= fetch_videos.get_html_content_summary_only(df_summaries)

# Attach the HTML content to the email
message.attach(MIMEText(html_content, 'html'))

# Function to send email
def send_email():
    try:
        server = smtplib.SMTP('smtp-mail.outlook.com', 587)  # Outlook SMTP server
        #server.set_debuglevel(1)  # Enable debugging output
        server.starttls()
        server.login(email_user, email_password)
        text = message.as_string()
        server.sendmail(email_user, recipients, text)
        server.quit()
        print("Email sent successfully!")
    except Exception as e:
        print(f"Failed to send email: {e}")

# Send the email
# send_email()

# One Off Study

In [17]:
# transcript = fetch_videos.get_transcript('GabBf791bdY')
# print(transcript)
# # Example task
# task2 = """I would like you to summarize the text which describes Tesla FSD.
# Please say in what senario FSD performed well and waht senario it didn't perform well.
# Format like this:
# Title: 
# Key takeaways:
# * FSD performed well in ...
# * FSD had some challenges in ....
# Improvement over pervious version if any.
# """
# task3 = """could you print the transcript with two person speaking into a human readible format and maintain the original content?"""

# summary = get_summary(transcript, client, task3)
# print(summary)

In [18]:
def get_summary_in_chunks(transcript, client, task, chunk_size=500):
    """
    Generates summaries in chunks to avoid truncation.

    Parameters:
    - transcript (str): The full transcript text.
    - client (object): The client to use for generating summaries.
    - task (str): The task identifier for the summary generation.
    - chunk_size (int): The maximum size of each chunk.

    Returns:
    - str: Combined summary of all chunks.
    """
    import textwrap

    # Split the transcript into chunks
    chunks = textwrap.wrap(transcript, chunk_size)
    full_summary = ""

    for chunk in chunks:
        summary = get_summary(chunk, client, task)
        full_summary += summary + "\n"

    return full_summary

In [19]:
# transcript = fetch_videos.get_transcript('I8JzsnZVylY')
# print(transcript)

In [20]:
task2 = """I would like you to summarize the text which describes Tesla FSD.
Please say in what senario FSD performed well and waht senario it didn't perform well.
Format like this:
Title: 
Key takeaways:
* FSD performed well in ...
* FSD had some challenges in ....
Improvement over pervious version if any.
"""
task3 = """"""

# summary = fetch_videos.apply_task(transcript, client, task3)
print(summary)

The text discusses small-cap stocks and includes mentions of the Russell stocks category in several areas. Notably, the mentions are found in the discussions led by Moneta's Aoifinn Devitt and the summary of the insights shared by Dan Flax of Neuberger Berman:

1. **Discussion with Aoifinn Devitt:** 
   - The focus is on the performance of small and mid-cap stocks, which are noted to be currently down by 2% for the quarter. However, they are expected to outperform the broader market in profit growth terms.
   - Small and mid-cap stocks are predicted to experience significantly faster profit growth than the S&P 500, with anticipated growth in the high teens to over 20%, contrasting with the S&P 500's expected growth in the low teens.
   - There is an expectation of potential rate cuts which could benefit small and mid-caps more due to their lower interest coverage ratio compared to larger companies.

2. **Dan Flax's Insights:** 
   - Discussion mentions a focus on small and mid-cap stoc