## Importing Modules To Connect Youtube API 

#### Importing all the modules which are required to pull data from Youtube API which are as follows -:
* Google APi Client
* Pandas
* Json
* CSV


In [6]:
from googleapiclient.discovery import build
import pandas as pd
import json
import csv

## Requesting Data From Youtube Api. 
* Response variable has all the `raw` data.
* All channels related stats are stored in data frame
* We have to find playlist ids for all the channels that is why loop is used to iterate over rows.
* Stored in a variable named playlist_ids
* Finally, return playlist_ids

In [7]:
def get_playlist_ids(youtube,channel_id):
    #requesting data from youtube api
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id = ','.join(channel_id),
    )
    #executing response
    response =request.execute()

    print(response)

    # storing data in all_data list

    all_data = []

    #all important information about the channel

    for item in response['items']:
        data = {'channelName': item['snippet']['title'],
                'subscribers': item['statistics']['subscriberCount'], 
                'views': item['statistics']['viewCount'], 
                'totalViews': item['statistics']['videoCount'],
                'playlistId': item['contentDetails']['relatedPlaylists']['uploads']

        }

    #appending data into list
    all_data.append(data)

    #converting data into a dataframe

    data_frame = pd.DataFrame(all_data)


    # making playlist_id for all the channels

    playlist_id = []

    # extracting playlist_ids of all the channels

    for index, rows in data_frame.iterrows():
        if rows['playlistId'] not in playlist_id: 
            playlist_id.append(rows['playlistId'])


    # return playlist_id


    return playlist_id





## Extracting Video Id's From Playlist Id's. 
* Response variable has all the `raw` data.
* Cleaning data and returning video id as a list
* Next page token is to get all the videos. 

In [8]:
def get_video_ids(youtube, playlist_id):
    video_ids = []

    #requesting data from youtube api

    request =youtube.playlistItems().list(
        part = "snippet, contentDetails", 
        playlistId = ','.join(playlist_id),
    )
    #executing response

    response = request.execute()

    #appending video ids in a list


    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])



    #parsing to next pages with nextpagetoken


    next_page_token = response.get('nextPageToken')

    while next_page_token is not None:
        #requesting data from youtube api

        request = youtube.playlistItems().list(
            part = "snippet, contentDetails", 
            playlistId = ','.join(playlist_id),
            maxResults = 50,
            pageToken = next_page_token
            )
        
        response = request.execute()
        #executing response
        
        next_page_token = response.get('nextPageToken')

        #appending video ids in a list
        
        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

    return video_ids

In [9]:
def get_video_stats(youtube, video_ids):
    all_video_info = []


    for ids in video_ids:
        request = youtube.videos().list(
            part = "snippet,contentDetails,statistics",
            id = ids
        )
        response = request.execute()

        for video in response["items"]:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                        'statistics':['viewCount','likeCount','commentCount'],
                        'contentDetails':['duration']

            }
            video_info = {}
            video_info['video_id'] = video['id']

            for keys in stats_to_keep.keys():
              for values in stats_to_keep[keys]:
                  try: 
                      video_info[values] = video[keys][values]
                  except: 
                      video_info[values] = None
            
            
            all_video_info.append(video_info)

    data_frame = pd.DataFrame(all_video_info)



    return data_frame


In [10]:
#Driver Code
#get credentials and create api client
api_service_name = "youtube"
api_version = "v3"

#Youtube Api Key

api_key = "Your API Key Here"


#Building Youtube to call the API using API Key
youtube = build(
        api_service_name,api_version,developerKey=api_key
    )


# ALL Channell id that we have to extract data from

channel_id = ["UCcGcC0DCDXyqSPdpskY99CQ", 
                #more channels here
    ]

# getting channel stats

playlist_ids = get_playlist_ids(youtube,channel_id)

# getting video ids for all the videos

video_ids = get_video_ids(youtube,playlist_ids)


video_stats = get_video_stats(youtube,video_ids)

# comments_in_videos = get_comments_in_videos(youtube,video_ids)




{'kind': 'youtube#channelListResponse', 'etag': '79tef8qPnUO0vVEGSJ6ByK2M4dk', 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5}, 'items': [{'kind': 'youtube#channel', 'etag': 'zvagHoPOtwejQsYYo_qHCpeZC94', 'id': 'UCcGcC0DCDXyqSPdpskY99CQ', 'snippet': {'title': 'Deepak Tulsyan', 'description': "Namaste🙏 \nWelcome to my channel. My name is Deepak Tulsyan.\nI'm a Dancer, Choreographer & traveler. Subscribe to my channel if you love watching Dance Videos 😊\n\nFor business enquiry \nMail: deepaktulsyanyt@gmail.com\n.\n📍Delhi, India", 'customUrl': 'deepaktulsyan351', 'publishedAt': '2011-01-29T15:44:29Z', 'thumbnails': {'default': {'url': 'https://yt3.ggpht.com/ytc/AKedOLRvV3wyponDC8qMSR2UqPcyDa9X98MM9ERcwzCu=s88-c-k-c0x00ffffff-no-rj', 'width': 88, 'height': 88}, 'medium': {'url': 'https://yt3.ggpht.com/ytc/AKedOLRvV3wyponDC8qMSR2UqPcyDa9X98MM9ERcwzCu=s240-c-k-c0x00ffffff-no-rj', 'width': 240, 'height': 240}, 'high': {'url': 'https://yt3.ggpht.com/ytc/AKedOLRvV3wyponDC8qMSR2UqPcyDa9X98M

In [11]:
import datetime
# Converting coloumns with numericals into numeric coloumn for data visualization

numeric_cols = ['viewCount', 'likeCount', 'commentCount', 'publishedAt']



video_stats[numeric_cols] = video_stats[numeric_cols].apply(pd.to_numeric, errors= 'coerce',axis = 1)

#Cleaning publishedAt coloumn


# video_stats['publishedAt'] = video_stats['publishedAt'].apply(lambda x: parser.parse(x))

# #Cleaning publishedAt coloumn

# video_stats['publishedAt'] = video_stats['publishedAt'].apply(lambda x: x.strptime("%A"))

In [12]:
# importing isodate to make duration visible
import isodate
#Cleaning duration date
video_stats['durationSecs'] = video_stats['duration'].apply(lambda x: isodate.parse_duration(x))

#converting duration to seconds

video_stats['durationSecs'] = video_stats['durationSecs'].astype('timedelta64[s]')
#counitng tagcount
video_stats['tagCount'] = video_stats['tags'].apply(lambda x:0 if x is None else len(x))


video_stats

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,commentCount,duration,durationSecs,tagCount
0,NS4jp_gyALE,Deepak Tulsyan,Ayeeee 🔥 #teamGM #voodoo #deepaktulsyan,,,,47691.0,2818.0,41.0,PT23S,23.0,0
1,Ylc3HIjuRE4,Deepak Tulsyan,Dancing with Aanya Gupta | Deepak Tulsyan #shorts,,,,117152.0,5130.0,49.0,PT15S,15.0,0
2,qqfO9xnOKpg,Deepak Tulsyan,Check My Fizz | Dancing with @Jacqueline Ferna...,Now Wait is over…..Check out our new video for...,"[karan johar, Check My Fizz, badshah, jacqueli...",,113075.0,5621.0,43.0,PT54S,54.0,5
3,VOkOzPLpDeA,Deepak Tulsyan,Jugnu- Full Class Video | Deepak Tulsyan Chore...,My Merchandise: https://deepaktulsyan.themerch...,"[Deepak, Tulsyan, deepak tulsyan, g m dance ce...",,340967.0,9482.0,347.0,PT5M44S,344.0,5
4,t52B_scWHdg,Deepak Tulsyan,"New Trend | Stop, Drop & Roll…🔥| Deepak Tulsya...",Follow me on Josh app for more exclusive conte...,,,138424.0,4946.0,27.0,PT14S,14.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
259,ArPozq_q_xs,Deepak Tulsyan,How to Dance | Tutorial no. 3 | Sun Saathiya |...,Steps for Complete Beginners! Deepak welcomes ...,"[ABCD 2, The Walt Disney Company (Production C...",,113509.0,1137.0,,PT4M9S,249.0,13
260,_FOm1FMkccg,Deepak Tulsyan,How to dance | Tutorial no. 2 | Basic footwork,This is a 3 steps tutorial video. Watch it cou...,"[Jazz (Musical Genre), Ballet Dancer (Professi...",,84339.0,1322.0,,PT1M17S,77.0,6
261,plbfhxhYIsY,Deepak Tulsyan,Sooraj Duba Hai | Roy | Freestyle Dance Choreo...,For more Videos....\nhttps://www.facebook.com/...,[Dance (Interest)],,10152.0,164.0,,PT38S,38.0,1
262,LsT6BfBZYCw,Deepak Tulsyan,Must Watch | Abhi toh party Suru hui hai | Fre...,For more Videos....\nhttps://www.facebook.com/...,"[Dance (Interest), Kid Cudi (Music Video Perfo...",,11006.0,305.0,,PT1M20S,80.0,5


## Importing Data Visualization Packages
 * Seaborn 
 * Matplotlib



In [13]:
# Data visualization packages
import seaborn as sns
import matplotlib
import matplotlib.ticker as tck
import matplotlib.pyplot as plt



## Exploratory Data Analysis

### Best Performing Videos Graph 

In [None]:
ax = sns.barplot(x='title', y ='viewCount', data = video_stats.sort_values('viewCount', ascending= False)[0:9])
ax.yaxis.set_major_formatter(tck.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x/1000) + 'K'))
plot = ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)