In [None]:
pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client




In [None]:
import os
import csv
from googleapiclient.discovery import build

In [None]:
API_KEY = '################'

In [None]:
# Initialize YouTube API client
youtube = build('youtube', 'v3', developerKey=API_KEY)

def search_nike_videos(query="Nike shoes", max_results=100, total_results=600):
    videos = []
    next_page_token = None
    results_fetched = 0

    while results_fetched < total_results:
        # Search for videos related to Nike shoes with pagination
        search_response = youtube.search().list(
            q=query,
            part='id,snippet',
            maxResults=max_results,
            type='video',
            pageToken=next_page_token
        ).execute()

        for item in search_response.get('items', []):
            video_id = item['id']['videoId']
            video_info = get_video_details(video_id)
            if video_info:
                videos.append(video_info)
                results_fetched += 1
                if results_fetched >= total_results:
                    break  # Stop if we have enough results

        next_page_token = search_response.get('nextPageToken')
        if not next_page_token:
            break  # Stop if there are no more pages

    return videos

def get_video_details(video_id):
    # Get details for each video
    video_response = youtube.videos().list(
        part='snippet,contentDetails,statistics',
        id=video_id
    ).execute()

    for video in video_response.get('items', []):
        # Gather video details
        video_info = {
            'Title': video['snippet']['title'],
            'Upload Date': video['snippet']['publishedAt'],
            'Views (Clicks)': video['statistics'].get('viewCount', 0),
            'Likes': video['statistics'].get('likeCount', 0),
            'Comments': video['statistics'].get('commentCount', 0),
            'Description': video['snippet'].get('description', ''),
            'Tags': ', '.join(video['snippet'].get('tags', [])),  # Join tags as a single string
            'URL': f"https://www.youtube.com/watch?v={video_id}"
        }
        return video_info
    return None

# Fetch Nike videos with pagination
nike_videos = search_nike_videos(total_results=600)

# Filter out any None entries from nike_videos
nike_videos = [video for video in nike_videos if video is not None]

# Define CSV file path
csv_file = "nike_youtubeAPI_data.csv"

# Save data to CSV only if there's data to write
if nike_videos:
    with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=nike_videos[0].keys())
        writer.writeheader()
        writer.writerows(nike_videos)
    print(f"Data saved to {csv_file}")
else:
    print("No data to save.")

Data saved to nike_youtubeAPI_data.csv


In [None]:
pip install textblob



In [None]:
import pandas as pd
from textblob import TextBlob

# Load data from the CSV file generated previously
csv_file = "nike_youtubeAPI_data.csv"
data = pd.read_csv(csv_file)

# Function to analyze sentiment
def analyze_sentiment(text):
    blob = TextBlob(str(text))  # Convert text to string if not already
    polarity = blob.sentiment.polarity
    subjectivity = blob.sentiment.subjectivity

    # Classify polarity
    polarity_label = "Positive" if polarity > 0 else "Negative" if polarity < 0 else "Neutral"
    # Classify subjectivity
    subjectivity_label = "Opinion" if subjectivity > 0.5 else "Fact"

    return polarity_label, subjectivity_label

# Apply sentiment analysis on the Title and Description
data['Title_Polarity'], data['Title_Subjectivity'] = zip(*data['Title'].apply(analyze_sentiment))
data['Description_Polarity'], data['Description_Subjectivity'] = zip(*data['Description'].apply(analyze_sentiment))

# Save the sentiment data to a new CSV
output_file = "nike_youtubeAPI_sentiment_data.csv"
data.to_csv(output_file, index=False)

print(f"Sentiment analysis results saved to {output_file}")


Sentiment analysis results saved to nike_youtubeAPI_sentiment_data.csv


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the cleaned dataset
file_path = "/mnt/data/nike_youtubeAPI_sentiment_data_cleaned.csv"
#data = pd.read_csv(file_path)

# Preprocess Data
# Convert 'Upload Date' to datetime and extract month and day
data['Upload Date'] = pd.to_datetime(data['Upload Date'])
data['Upload Month'] = data['Upload Date'].dt.month
data['Upload Day'] = data['Upload Date'].dt.day

# Convert sentiment labels to numeric values (e.g., 1 for Positive, -1 for Negative)
data['Title_Polarity_Numeric'] = data['Title_Polarity'].map({'Positive': 1, 'Negative': -1, 'Neutral': 0})
data['Description_Polarity_Numeric'] = data['Description_Polarity'].map({'Positive': 1, 'Negative': -1, 'Neutral': 0})
data['Title_Subjectivity_Numeric'] = data['Title_Subjectivity'].map({'Opinion': 1, 'Fact': 0})
data['Description_Subjectivity_Numeric'] = data['Description_Subjectivity'].map({'Opinion': 1, 'Fact': 0})

# Select features (independent variables) and target (dependent variable)
X = data[['Upload Month', 'Upload Day', 'Likes', 'Comments', 'Title_Polarity_Numeric', 'Description_Polarity_Numeric']]
y = data['Views (Clicks)']  # Target variable

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

# Display model coefficients
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
})
print(coefficients)


Mean Squared Error: 29179828318437.996
R-squared: 0.6607292316762493
                        Feature    Coefficient
0                  Upload Month -134625.553621
1                    Upload Day    4491.899630
2                         Likes      24.230332
3                      Comments     458.095327
4        Title_Polarity_Numeric -531595.860890
5  Description_Polarity_Numeric  254438.366009
