# test youtube api 

In [None]:
import logging
import os
from youtube_api import YouTubeAPI, YouTubeVideoAPI, YouTubeChannelAPI

# Configure root logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Ensure all loggers are set to at least INFO level
for name in logging.root.manager.loggerDict:
    logging.getLogger(name).setLevel(logging.INFO)

## Test Video 

In [None]:
import os
import logging
from youtube_api import YouTubeVideoAPI

# Initialize the YouTubeVideoAPI class
video_api = YouTubeVideoAPI("IsjKZstp-XU")
# video_api = YouTubeVideoAPI("pqh6nFKWB7k", transcript_language='zh')
try:
    video_info = video_api.fetch_video_info()
    
    if not video_info:
        raise ValueError("Failed to fetch video info")

    # Create a folder to store the video info files
    root_dir = "video_data"
    os.makedirs(root_dir, exist_ok=True)

    # Save video info to file
    saved_file = video_api.save_video_info_to_file(root_dir)

    if saved_file:
        logging.info(f"Video info saved to: {saved_file}")
    else:
        logging.warning("Failed to save video info")

except Exception as e:
    logging.error(f"An error occurred: {str(e)}")

## Test Channel 

In [None]:
import os
import logging
from youtube_api import YouTubeChannelAPI, YouTubeAPI
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Initialize the YouTubeChannelAPI class with an existing CSV
channel_name = "lexfridman"
root_dir = "channel_data"
existing_csv = os.path.join(root_dir, "lexfridman_20240903_20240930.csv")  # Update this path
yt_channel_api = YouTubeChannelAPI(channel_name, existing_csv=existing_csv)

# Specify the date range for fetching new videos
end_date = datetime.now()
start_date = end_date - timedelta(days=30)  # Fetch videos from the last 30 days

try:
    # Fetch channel info and new videos
    channel_info, channel_videos = yt_channel_api.fetch_channel_info(start_date, end_date)

    print("\nChannel Info:")
    print(channel_info)

    print(f"\nTotal videos (including existing and new): {len(channel_videos)}")
    print("\nLatest video:")
    print(channel_videos.iloc[0])

    # Update transcripts for new videos
    yt_channel_api.update_video_transcripts()

    # Save updated channel videos to CSV
    saved_file = yt_channel_api.save_channel_videos_to_csv(root_dir="channel_data")
    if saved_file:
        print(f"\nUpdated channel videos saved to: {saved_file}")

except Exception as e:
    logging.error(f"An error occurred: {str(e)}")

In [None]:
# import os
# from langchain.text_splitter import CharacterTextSplitter
# from langchain.chat_models import ChatOpenAI
# from langchain.embeddings import OpenAIEmbeddings
# from langchain.vectorstores import FAISS
# from langchain.chains import ConversationalRetrievalChain
# from langchain.memory import ConversationBufferMemory

# # Load OpenAI API key from environment variable
# openai_api_key = os.getenv("OPENAI_API_KEY")
# if not openai_api_key:
#     raise ValueError("OPENAI_API_KEY environment variable is not set")

# def create_chat_engine(text):
#     # Split the text into chunks
#     text_splitter = CharacterTextSplitter(
#         separator="\n",
#         chunk_size=1000,
#         chunk_overlap=200,
#         length_function=len
#     )
#     chunks = text_splitter.split_text(text)
    
#     if not chunks:
#         raise ValueError("No text chunks were created. The input text might be empty.")

#     print(f"Number of chunks: {len(chunks)}")

#     # Create embeddings and vector store
#     embeddings = OpenAIEmbeddings()
#     vectorstore = FAISS.from_texts(chunks, embeddings)

#     # Create the chat engine
#     llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
#     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    
#     chat_engine = ConversationalRetrievalChain.from_llm(
#         llm=llm,
#         retriever=vectorstore.as_retriever(),
#         memory=memory,
#         verbose=True
#     )
    
#     return chat_engine

# # Use the transcript from the previous cell
# transcript = transformed_transcripts.get('en', '')  # Assuming 'en' is the key for English transcript

# if not transcript:
#     raise ValueError("No English transcript found in transformed_transcripts.")

# print(f"Transcript length: {len(transcript)}")
# print(f"First 500 characters of transcript: {transcript[:500]}")

# # Create the chat engine
# try:
#     chat_engine = create_chat_engine(transcript)
# except Exception as e:
#     print(f"An error occurred while creating the chat engine: {str(e)}")
#     raise

# # Function to chat with the model
# def chat_with_transcript(chat_engine, query):
#     try:
#         result = chat_engine({"question": query})
#         return result['answer']
#     except Exception as e:
#         return f"An error occurred while processing the query: {str(e)}"

# # # Example usage
# # query = "Give me a list of historical figures mentioned and the context of the mentioning please?"
# # response = chat_with_transcript(chat_engine, query)
# # print(f"Human: {query}")
# # print(f"AI: {response}")

# # # You can continue asking questions
# # query2 = "What are the expectations for earnings growth?"
# # response2 = chat_with_transcript(chat_engine, query2)
# # print(f"\nHuman: {query2}")
# # print(f"AI: {response2}")

# # Interactive chat loop
# print("\nYou can now chat interactively. Type 'exit' to end the conversation.")
# while True:
#     user_input = input("Human: ")
#     if user_input.lower() == 'exit':
#         break
#     response = chat_with_transcript(chat_engine, user_input)
#     print(f"AI: {response}")

In [2]:
import os
import logging
import json
from youtube_base import YouTubeBase
from video import Video
from utils import sanitize_filename  # Import the sanitize_filename function

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Initialize the YouTubeBase class
youtube_base = YouTubeBase()

# Initialize the Video class
video_id = "IsjKZstp-XU"
# video_id = "pqh6nFKWB7k"  # For Chinese transcript
video = Video(video_id, youtube_base)
# video = Video(video_id, youtube_base, transcript_language='zh')  # For Chinese transcript

# Fetch video info
video_info = video.fetch_video_info()

if not video_info:
    logging.warning("Failed to fetch video info")
else:
    # Create a folder to store the video info files
    root_dir = "video_data"
    os.makedirs(root_dir, exist_ok=True)

    # Save video info to file
    title = video_info['Title']
    transcript_language = video_info.get('Transcript', [None, None])[1] or 'en'
    sanitized_title = sanitize_filename(title, max_length=100)  # Sanitize the title
    file_name = f"{sanitized_title}_{transcript_language}.json"
    file_path = os.path.join(root_dir, file_name)

    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(video.to_dict(), f, ensure_ascii=False, indent=2)
    
    logging.info(f"Video info saved to: {file_path}")

# Print video info
print("Video Information:")
print(json.dumps(video.to_dict(), indent=2, ensure_ascii=False))

2024-10-02 13:35:04,220 - googleapiclient.discovery_cache - INFO - file_cache is only supported with oauth2client<4.0.0


Video Information:
{
  "Video ID": "IsjKZstp-XU",
  "Description": "We’re clearly heading toward war with Iran. Is there any scenario where that’s a good thing for the United States? A definitive answer from Jeffrey Sachs.\n\nWe're going on tour! Get your tickets now: https://tuckercarlson.com/events\n\nSee Donald Trump as you’ve never seen him before in the upcoming docuseries, \"Art of the Surge\" premiering only on TCN. Become a member: https://watchtcn.co/4fuXHQq\n\nIt’s official! We’ve partnered with Old Row to bring you exclusive merch. Shop now at: https://store.tuckercarlson.com\n\nWatch our first conversation with Jeffrey Sachs here: https://youtu.be/JS-3QssVPeg\n\nPaid partnerships:\n\nEight Sleep: Use promo code \"Tucker\" to get $350 off the Pod 4 Ultra at https://EightSleep.com/Tucker\n\nUnplugged: Get $25 off a new phone with code \"Tucker\" at https://Unplugged.com/Tucker\n\n#TuckerCarlson #JeffreySachs #DonaldTrump #Iran #Russia #Ukraine #war #Putin #JoeBiden #nuclearwa

## Test Channel 2

In [2]:
import os
import logging
from channel import ChannelFactory
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Initialize the YouTubeChannel class
channel_name = "lexfridman"  # lexfridman, dashengmedia
root_dir = "channel_data"
existing_csv = os.path.join(root_dir, "lexfridman_20240903_20240930.csv")

# Check if the CSV file exists
if os.path.exists(existing_csv):
    logging.info(f"Loading existing data from {existing_csv}")
else:
    logging.warning(f"CSV file not found: {existing_csv}. Starting with empty data.")
    existing_csv = None

# Create a YouTube channel using the ChannelFactory
yt_channel = ChannelFactory.create_channel(
    "youtube",
    channel_name,
    transcript_language='zh',
    existing_csv=existing_csv
)

# Specify the date range for fetching new videos
end_date = datetime.now()
start_date = end_date - timedelta(days=7)  # Fetch videos from the last 7 days

try:
    # Fetch channel info and new videos
    channel_info, channel_videos = yt_channel.fetch_channel_info(start_date, end_date)

    print("\nChannel Info:")
    print(channel_info)

    if channel_videos is not None and not channel_videos.empty:
        print(f"\nTotal videos: {len(channel_videos)}")
        print("\nLatest video:")
        print(channel_videos.iloc[0])
    else:
        print("\nNo videos found in the specified date range.")

    # Update transcripts for new videos
    yt_channel.update_video_transcripts()

    # Save updated channel videos to CSV
    saved_file = yt_channel.save_channel_videos_to_csv(root_dir=root_dir)
    if saved_file:
        print(f"\nUpdated channel videos saved to: {saved_file}")

    # Clear video APIs to free up memory
    yt_channel.clear_video_apis()

except Exception as e:
    logging.error(f"An error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

2024-10-02 13:49:50,986 - googleapiclient.discovery_cache - INFO - file_cache is only supported with oauth2client<4.0.0
2024-10-02 13:49:54,157 - YouTubeChannel - INFO - Fetched 2 new videos.
2024-10-02 13:49:54,159 - YouTubeChannel - INFO - Merged new videos. Total videos: 2
2024-10-02 13:49:54,161 - YouTubeChannel - INFO - Video transcripts have been updated.
2024-10-02 13:49:54,174 - YouTubeChannel - INFO - Successfully saved channel videos to channel_data/lexfridman_20240925_20240930.csv



Channel Info:
{'channel_name': 'lexfridman', 'channel_id': 'UCSHZKyawb77ixDdsGog4iWA', 'description': 'Lex Fridman Podcast and other videos.\n'}

Total videos: 2

Latest video:
Video ID                                                    AzzE7GOvYz8
Title                 Ed Barnhart: Maya, Aztec, Inca, and Lost Civil...
Description           Ed Barnhart is an archaeologist and explorer s...
Published At                                  2024-09-30 12:23:24-05:00
Duration (minutes)                                                  209
Channel ID                                     UCSHZKyawb77ixDdsGog4iWA
Channel Name                                                Lex Fridman
Transcript            (- For the vast majority\nof human existence. ...
Name: 0, dtype: object

Updated channel videos saved to: channel_data/lexfridman_20240925_20240930.csv


In [5]:
import os
import logging
from channel import ChannelFactory
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Define channel parameters
root_dir = "channel_data"

# Create a virtual channel
channel_name = "my fav videos"
video_ids = ["oX7OduG1YmI","ds2hr9qSkiU"]

# Create a virtual channel using the ChannelFactory
virtual_channel = ChannelFactory.create_channel(
    "virtual",
    channel_name,
    video_ids=video_ids
)

# Specify a wide date range to ensure all videos are fetched
end_date = datetime.now()
start_date = end_date - timedelta(days=7)  # Fetch videos from the last 10 years

try:
    # Fetch channel info and videos
    channel_info, channel_videos = virtual_channel.fetch_channel_info(start_date, end_date)

    print("\nVirtual Channel Info:")
    print(f"Channel Name: {channel_info['channel_name']}")
    print(f"Channel ID: {channel_info['channel_id']}")
    print(f"Description: {channel_info.get('description', 'N/A')}")

    if channel_videos is not None and not channel_videos.empty:
        print(f"\nTotal videos: {len(channel_videos)}")
        print("\nVideos in the virtual channel:")
        for _, video in channel_videos.iterrows():
            print(f"Title: {video.get('Title', 'N/A')}")
            print(f"Published At: {video.get('Published At', 'N/A')}")
            print(f"Video ID: {video.get('Video ID', 'N/A')}")
            print("---")
    else:
        print("\nNo videos found in the virtual channel.")

    # Update transcripts for videos
    virtual_channel.update_video_transcripts()

    # Save channel videos to CSV
    saved_file = virtual_channel.save_channel_videos_to_csv(root_dir=root_dir)
    if saved_file:
        print(f"\nVirtual channel videos saved to: {saved_file}")

    # Clear video APIs to free up memory
    virtual_channel.clear_video_apis()

except Exception as e:
    logging.error(f"An error occurred: {str(e)}")
    import traceback
    traceback.print_exc()

2024-10-02 13:58:41,324 - googleapiclient.discovery_cache - INFO - file_cache is only supported with oauth2client<4.0.0
2024-10-02 13:58:43,544 - VirtualChannel - INFO - Video transcripts have been updated.
2024-10-02 13:58:43,551 - VirtualChannel - INFO - Successfully saved channel videos to channel_data/my_fav_videos_20240925_20240930.csv



Virtual Channel Info:
Channel Name: my fav videos
Channel ID: virtual
Description: 

Total videos: 2

Videos in the virtual channel:
Title: The Future Mark Zuckerberg Is Trying To Build
Published At: 2024-09-25 12:54:09-05:00
Video ID: oX7OduG1YmI
---
Title: Looking Ahead: The Future of Generative AI – Ben Zhao
Published At: 2024-09-30 13:48:51-05:00
Video ID: ds2hr9qSkiU
---

Virtual channel videos saved to: channel_data/my_fav_videos_20240925_20240930.csv
