In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as datetime


In [6]:
import os
import glob

path = r"...\june" # use your path
all_files = glob.glob(os.path.join(path, "*.csv"))     # advisable to use os.path.join as this makes concatenation OS independent

# Read csv files for each one in the file path
df_from_each_file = (pd.read_csv(f) for f in all_files)

# Concatenate dataframes
concatenated_df   = pd.concat(df_from_each_file, ignore_index=True)

In [7]:
# Fill in missing data for date_scraped
concatenated_df["date_scraped"] = concatenated_df["date_scraped"].fillna(datetime.datetime(2022,6,3,12,0,0))

In [8]:
concatenated_df.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   title          280 non-null    object
 1   url            280 non-null    object
 2   thumbnail_url  4 non-null      object
 3   channel        280 non-null    object
 4   description    278 non-null    object
 5   views          280 non-null    object
 6   post_date      280 non-null    object
 7   date_scraped   280 non-null    object
dtypes: object(8)
memory usage: 249.4 KB


In [9]:
concatenated_df[concatenated_df["description"].isnull()]

Unnamed: 0,title,url,thumbnail_url,channel,description,views,post_date,date_scraped
141,How the Celtics Locker Room was after losing t...,https://www.youtube.com/watch?v=qE1s0irr_n8,,RDCworld1,,1.4M views,17 hours ago,2022-06-17 18:30:19
154,How the Celtics Locker Room was after losing t...,https://www.youtube.com/watch?v=qE1s0irr_n8,,RDCworld1,,2M views,1 day ago,2022-06-18 13:21:46


In [10]:
concatenated_df["channel"].value_counts(normalize=True) * 100

NBA                      2.142857
Ryan Trahan              2.142857
HYBE LABELS              2.142857
BANGTANTV                1.785714
America's Got Talent     1.785714
                           ...   
FORMULA 1                0.357143
ARK: Survival Evolved    0.357143
PlayOverwatch            0.357143
Dtay Known               0.357143
AnthonyPadilla           0.357143
Name: channel, Length: 173, dtype: float64

In [11]:
concatenated_df.tail(10)

Unnamed: 0,title,url,thumbnail_url,channel,description,views,post_date,date_scraped
270,"100 Kills = $500,000 Challenge!",https://www.youtube.com/watch?v=qjoz-CAO3xQ,,MrBeast Gaming,Thanks to EA for sponsoring this video! Click ...,3.6M views,22 hours ago,2022-06-30 12:15:21
271,No one can stop me from buying weird ads (not ...,https://www.youtube.com/watch?v=1NEseESJK_4,,Drew Gooden,Sponsored by HelloFresh. Use my code IMALITTLE...,1.2M views,22 hours ago,2022-06-30 12:15:25
272,Gucci Mane - All Dz Chainz (feat. Lil Baby) [O...,https://www.youtube.com/watch?v=p1Tqr-tzTJc,,Gucci Mane,Gucci Mane - All Dz Chainz (feat. Lil Baby) Do...,579K views,1 day ago,2022-06-30 12:15:27
273,Duke Deuce Ft. Glorilla - JUST SAY THAT (Offic...,https://www.youtube.com/watch?v=CeW-Sn3E8Ag,,Duke Deuce,"#NowPlaying Duke Deuce Ft. Glorilla ""JUST SAY ...",2.9M views,13 days ago,2022-06-30 12:15:30
274,I Installed a 1000HP LS Engine in my Ford Ranger,https://www.youtube.com/watch?v=6ARv1H3Ot5I,,Westen Champlin,Get your Redneck Science Clothing ► https://we...,742K views,20 hours ago,2022-06-30 12:15:32
275,j-hope 'MORE' Official Teaser,https://www.youtube.com/watch?v=CDter5sJw9Q,,HYBE LABELS,j-hope 'MORE' Official Teaser Credits: Product...,4.2M views,1 day ago,2022-06-30 12:15:35
276,Busting a 1000 MYTHS in 72 HOURS!,https://www.youtube.com/watch?v=tJaSWjHJvdo,,Keeley,Busting a 1000 MYTHS in 72 HOURS! with Keeley ...,303K views,21 hours ago,2022-06-30 12:15:37
277,Ticket to Paradise | Official Trailer [HD],https://www.youtube.com/watch?v=hkP4tVTdsz8,,Universal Pictures,For better or for worse. George Clooney and Ju...,2.7M views,1 day ago,2022-06-30 12:15:39
278,"I spent a day with NIHACHU: ""Against All Odds""",https://www.youtube.com/watch?v=MNq7Gf6910k,,AnthonyPadilla,I spent a day with Nihachu to learn the truth ...,274K views,18 hours ago,2022-06-30 12:15:40
279,Teaser Trailer | Hocus Pocus 2 | Disney+,https://www.youtube.com/watch?v=ecJRvz9nk-U,,Walt Disney Studios,"This Halloween Season, some legends never die....",2.7M views,2 days ago,2022-06-30 12:15:40


In [12]:
# Drop thumbnail url
concatenated_df.drop(columns="thumbnail_url")

Unnamed: 0,title,url,channel,description,views,post_date,date_scraped
0,CELTICS at WARRIORS | FULL GAME 1 NBA FINALS H...,https://www.youtube.com/watch?v=FORXBT6GFa0,NBA,"Stay up-to-date on news, live scores and stats...",3.4M views,14 hours ago,2022-06-03 12:00:00
1,Polo G - Distraction (Official Video),https://www.youtube.com/watch?v=ktFLtS8GfoU,Polo G,"Official video for ""Distraction"" by Polo G Pol...",973K views,14 hours ago,2022-06-03 12:00:00
2,"State of Play | June 2, 2022 [ENGLISH]",https://www.youtube.com/watch?v=Wr4PGa1sHFQ,PlayStation,Rated RP – Mature Tune in for nearly 30 minute...,2M views,Streamed 19 hours ago,2022-06-03 12:00:00
3,I Survived On $0.01 For 30 Days - Day 2,https://www.youtube.com/watch?v=6BVMjlWja_8,Ryan Trahan,Day 2. 1 penny. 1 MILLION MEALS BABY! donate h...,2.1M views,1 day ago,2022-06-03 12:00:00
4,Millie Bobby Brown Needs a Milkshake While Eat...,https://www.youtube.com/watch?v=n5Qpg-iZPT8,First We Feast,Millie Bobby Brown is an actress and producer ...,5M views,1 day ago,2022-06-03 12:00:00
...,...,...,...,...,...,...,...
275,j-hope 'MORE' Official Teaser,https://www.youtube.com/watch?v=CDter5sJw9Q,HYBE LABELS,j-hope 'MORE' Official Teaser Credits: Product...,4.2M views,1 day ago,2022-06-30 12:15:35
276,Busting a 1000 MYTHS in 72 HOURS!,https://www.youtube.com/watch?v=tJaSWjHJvdo,Keeley,Busting a 1000 MYTHS in 72 HOURS! with Keeley ...,303K views,21 hours ago,2022-06-30 12:15:37
277,Ticket to Paradise | Official Trailer [HD],https://www.youtube.com/watch?v=hkP4tVTdsz8,Universal Pictures,For better or for worse. George Clooney and Ju...,2.7M views,1 day ago,2022-06-30 12:15:39
278,"I spent a day with NIHACHU: ""Against All Odds""",https://www.youtube.com/watch?v=MNq7Gf6910k,AnthonyPadilla,I spent a day with Nihachu to learn the truth ...,274K views,18 hours ago,2022-06-30 12:15:40


In [14]:
# to csv file
concatenated_df.to_csv(r"june_2022.csv",index=False)