1. What is the right time in a day to upload a video on youtube to get it in trending?
2. Best day to upload video to get more views?
3. What kind of content users like to watch more?
4. How much duration video people like to watch more?

In [None]:
# Filtering out the warnings

import warnings

warnings.filterwarnings('ignore')

In [None]:
#importing the required libraries and reading the data
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

youtube_df = pd.read_csv('../input/youtube-trending-video-dataset/IN_youtube_trending_data.csv')
youtube_df

In [None]:
#checking rows and columns in dataframe
youtube_df.shape

In [None]:
# Check the column-wise info of the dataframe
youtube_df.info()

In [None]:
# Check the summary for the numeric columns 
youtube_df.describe()

In [None]:
#taking top 5 columns
youtube_df.head()

In [None]:
#Date & Time format info
youtube_df.publishedAt = pd.to_datetime(youtube_df.publishedAt, format="%Y-%m-%dT%H:%M:%SZ")
youtube_df.info()

In [None]:
#checking null values in youtube_df and taking there sum
youtube_df.isnull().sum()

In [None]:
#Time clock
#Morning 4:01Am to 10:00am
#Aftenoon 10:01am to 4:00Pm
#Evening 4:01PM to 10:00pm
#Night 10:01pm to 4:00Am

def getDaytime(x):
    if x.hour>4 and x.hour<=10:
        return "Morning"
    elif x.hour>10 and x.hour<=16:
        return "Afternoon"
    elif x.hour>16 and x.hour<=22:
        return "Evening"
    else:
        return "Night"

youtube_df["daytime"]=youtube_df.publishedAt.apply(lambda x: getDaytime(x))
youtube_df.head()

In [None]:
#Checking Video counts and Avg likes by plotting them
youtube_df_grouped = youtube_df.groupby('daytime').agg({'daytime':'count','likes':'mean','dislikes':'mean'})
plt.figure(figsize=(20,7))
plt.subplot(1,3,1)
plt.title ("Video Count")
g1=sns.barplot(x=youtube_df_grouped.index, y= 'daytime', data=youtube_df_grouped)
g1.set(xlabel=None, ylabel=None)
plt.subplot(1,3,2)
plt.title ("Average Likes")
g2=sns.barplot(x=youtube_df_grouped.index, y= 'likes', data=youtube_df_grouped)
g2.set(xlabel=None, ylabel=None)
plt.subplot(1,3,3)
plt.title ("Average Dislikes")
g3=sns.barplot(x=youtube_df_grouped.index, y= 'dislikes', data=youtube_df_grouped)
g3.set(xlabel=None, ylabel=None)
plt.show()

### Observations
- Most of the videos are released either in the <b>morning</b> or <b>afternoon</b>.
- On the contrary average likes are higher for videos getting released either in the <b>evening</b> or at <b>night</b>.

likes dislikes 
day wise sunday, mon
month wise


In [None]:
#creating weekday column
import datetime

def getweekday(x):
    weekDays = ("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")
    publishedAt= datetime.date(x.year,x.month,x.day)
    publishedAt = publishedAt.weekday()
    publishedAtAsString = weekDays[publishedAt]
    return publishedAtAsString

youtube_df["weekday"]=youtube_df.publishedAt.apply(lambda x: getweekday(x))
youtube_df.head()

In [None]:
#Checking Video counts and Avg likes Weekley by plotting them
cats = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
from pandas.api.types import CategoricalDtype
youtube_df_grouped = youtube_df.groupby('weekday').agg({'weekday':'count','likes':'mean','dislikes':'mean'}).reindex(cats)  
plt.figure(figsize=(30,12))
plt.subplot(1,3,1)
plt.title ("Video Count")
g1=sns.barplot(x=youtube_df_grouped.index, y= 'weekday', data=youtube_df_grouped)
g1.set(xlabel=None, ylabel=None)
plt.xticks(rotation=45)
plt.subplot(1,3,2)
plt.title ("Average Likes")
g2=sns.barplot(x=youtube_df_grouped.index, y= 'likes', data=youtube_df_grouped)
g2.set(xlabel=None, ylabel=None)
plt.xticks(rotation=45)
plt.subplot(1,3,3)
plt.title ("Average Dislikes")
g3=sns.barplot(x=youtube_df_grouped.index, y= 'dislikes', data=youtube_df_grouped)
g3.set(xlabel=None, ylabel=None)
plt.xticks(rotation=45)
plt.show()

In [None]:
#creating month column
def getmonth(x):
    if x.month>0 and x.month<=1:
        return "January"
    elif x.month>1 and x.month<=2:
        return "Feburary"
    elif x.month>2 and x.month<=3:
        return "March"
    elif x.month>3 and x.month<=4:
        return "April"
    elif x.month>4 and x.month<=5:
        return "May"
    elif x.month>5 and x.month<=6:
        return "June"
    elif x.month>6 and x.month<=7:
        return "July"
    elif x.month>7 and x.month<=8:
        return "August"
    elif x.month>8 and x.month<=9:
        return "September"
    elif x.month>9 and x.month<=10:
        return "October"
    elif x.month>10 and x.month<=11:
        return "November"
    else:
        return "December"

youtube_df["month"]=youtube_df.publishedAt.apply(lambda x: getmonth(x))
youtube_df.head()

In [None]:
#Checking Video counts and Avg likes monthly by plotting them
Months = ['January', 'Feburary', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
youtube_df_grouped = youtube_df.groupby('month').agg({'month':'count','likes':'mean','dislikes':'mean'})
plt.figure(figsize=(30,15))
plt.subplot(1,3,1)
plt.title ("Video Count")
g1=sns.barplot(x=youtube_df_grouped.index, y= 'month',order=Months, data=youtube_df_grouped)
g1.set(xlabel=None, ylabel=None)
plt.xticks(rotation=45)
plt.subplot(1,3,2)
plt.title ("Average Likes")
g2=sns.barplot(x=youtube_df_grouped.index, y= 'likes',order=Months, data=youtube_df_grouped)
g2.set(xlabel=None, ylabel=None)
plt.xticks(rotation=45)
plt.subplot(1,3,3)
plt.title ("Average Dislikes")
g3=sns.barplot(x=youtube_df_grouped.index, y= 'dislikes',order=Months, data=youtube_df_grouped)
g3.set(xlabel=None, ylabel=None)
plt.xticks(rotation=45)
plt.show()

In [None]:
# df = pd.DataFrame(raw_data, 
#                   index = ['Rutuja', 'Neeraj', 
#                            'Renna', 'Pratik'])
  

# df['year'] = pd.DatetimeIndex(df['birth_date']).year
  

# df['month'] = pd.DatetimeIndex(df['birth_date']).month

In [None]:
# youtube_df_grouped['publishedAt'] = pd.to_datetime(youtube_df_grouped['publishedAt'],format='%Y%m%d')
# youtube_df_grouped['publishedAt'] = pd.DatetimeIndex(youtube_df_grouped['publishedAt']).year
# youtube_df_grouped['publishedAt'] = pd.DatetimeIndex(youtube_df_grouped['publishedAt']).month