## Test JSON Conversion to CSV Process

In [1]:
# Import modules
import os
import pandas as pd
import json

In [None]:
# Load JSON

# Set path where JSON is located
json_file_path = os.path.join("Data", "CA_category_id.json")

# Open and load JSON
with open(json_file_path, "r") as json_file:
    data = json.load(json_file)
data

In [None]:
# Eliminate main metadata/create iterable list of documents
item = data['items']
item

In [None]:
# Create empty lists to hold relevant data in JSON
id = []
channelID = []
title = []
assign = []

# Append data from each document to relevant list
for doc in item:
    id.append(doc['id'])
    channelID.append(doc['snippet']['channelId'])
    title.append(doc['snippet']['title'])
    assign.append(doc['snippet']['assignable'])

In [None]:
# Zip lists to create dataframe
df = pd.DataFrame(list(zip(id, channelID, title, assign)),
                 columns = ['Number', 'Channel ID', 'Title', 'Assignable?'])
df

In [None]:
# Test df export to CSV
country_code = "CA"

output_file_path = os.path.join("Data", f'{country_code}_category_id.csv')

df.to_csv(output_file_path, index=False)

## Convert All JSONs to CSVs

### Create Function

In [2]:
# Create function to convert all JSON files to CSV
def json_conversion(code):
    json_file_path = os.path.join("Data", f'{code}_category_id.json')
    with open(json_file_path, "r") as json_file:
        data = json.load(json_file)
    item = data['items']
    country = []
    id = []
    channelID = []
    title = []
    assign = []
    for doc in item:
        country.append(code)
        id.append(doc['id'])
        channelID.append(doc['snippet']['channelId'])
        title.append(doc['snippet']['title'])
        assign.append(doc['snippet']['assignable'])
    df = pd.DataFrame(list(zip(country, id, channelID, title, assign)),
                 columns = ['country', 'id_number', 'channel_id', 'title', 'assignable'])
    output_file_path = os.path.join("Data", f'{code}_category_id.csv')
    df.to_csv(output_file_path, index=False)

### Call Function to Convert JSONs

In [3]:
# Create list of data file country codes
countries = ["CA", "DE", "FR", "GB", "IN", "JP", "KR", "MX", "RU", "US"]

for code in countries:
    json_conversion(code)

## Prepare Video CSV Files

### Create video_countries Files

In [22]:
countries = ["CA", "DE", "FR", "GB", "IN", "JP", "KR", "MX", "RU", "US"]
d = {}

for code in countries:
    video_file_path = os.path.join("Data", f'{code}videos.csv')
    new_df = pd.read_csv(video_file_path, encoding ='utf_8')
    clean_df = new_df.loc[:, ['video_id', 'title', 'views', 'comment_count', 'trending_date']].copy()
    for row in clean_df:
        clean_df['country_code'] = code
    output_path = os.path.join("Data", f'{code}videos_clean.csv')
    clean_df.to_csv(output_path, index=False)
    d['df' + str(code)] = clean_df.add_suffix('_' + str(code))

d
        
video_countries_df = pd.concat([d['dfCA'], d['dfDE'], d['dfFR'], d['dfGB'], d['dfIN'], d['dfJP'], d['dfKR'], d['dfMX'], d['dfRU'], d['dfUS']])
video_countries_df

Unnamed: 0,video_id_CA,title_CA,views_CA,comment_count_CA,trending_date_CA,country_code_CA,video_id_DE,title_DE,views_DE,comment_count_DE,...,views_RU,comment_count_RU,trending_date_RU,country_code_RU,video_id_US,title_US,views_US,comment_count_US,trending_date_US,country_code_US
0,n1WpP7iowLc,Eminem - Walk On Water (Audio) ft. Beyoncé,17158579.0,125882.0,17.14.11,CA,,,,,...,,,,,,,,,,
1,0dBIkQ4Mz1M,PLUSH - Bad Unboxing Fan Mail,1014651.0,13030.0,17.14.11,CA,,,,,...,,,,,,,,,,
2,5qpjK5DgCt4,"Racist Superman | Rudy Mancuso, King Bach & Le...",3191434.0,8181.0,17.14.11,CA,,,,,...,,,,,,,,,,
3,d380meD0W0M,I Dare You: GOING BALD!?,2095828.0,17518.0,17.14.11,CA,,,,,...,,,,,,,,,,
4,2Vv-BfVoq4g,Ed Sheeran - Perfect (Official Music Video),33523622.0,85067.0,17.14.11,CA,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40944,,,,,,,,,,,...,,,,,BZt0qjTWNhw,The Cat Who Caught the Laser,1685609.0,2657.0,18.14.06,US
40945,,,,,,,,,,,...,,,,,1h7KV2sjUWY,True Facts : Ant Mutualism,1064798.0,3936.0,18.14.06,US
40946,,,,,,,,,,,...,,,,,D6Oy4LfoqsU,I GAVE SAFIYA NYGAARD A PERFECT HAIR MAKEOVER ...,1066451.0,3992.0,18.14.06,US
40947,,,,,,,,,,,...,,,,,oV0zkMe1K8s,How Black Panther Should Have Ended,5660813.0,13088.0,18.14.06,US


In [21]:
d

{'dfCA':       video_id_tCA                                          title_tCA  \
 0      n1WpP7iowLc         Eminem - Walk On Water (Audio) ft. Beyoncé   
 1      0dBIkQ4Mz1M                      PLUSH - Bad Unboxing Fan Mail   
 2      5qpjK5DgCt4  Racist Superman | Rudy Mancuso, King Bach & Le...   
 3      d380meD0W0M                           I Dare You: GOING BALD!?   
 4      2Vv-BfVoq4g        Ed Sheeran - Perfect (Official Music Video)   
 ...            ...                                                ...   
 40876  sGolxsMSGfQ                       HOW2: How to Solve a Mystery   
 40877  8HNuRNi8t70                   Eli Lik Lik Episode 13 Partie 01   
 40878  GWlKEM3m2EE  KINGDOM HEARTS III – SQUARE ENIX E3 SHOWCASE 2...   
 40879  lbMKLzQ4cNQ                   Trump Advisor Grovels To Trudeau   
 40880  POTgw38-m58                   【完整版】遇到恐怖情人該怎麼辦？2018.06.13小明星大跟班   
 
        views_tCA  comment_count_tCA trending_date_tCA country_code_tCA  
 0       17158579           

### Create countries File

In [None]:
country_names = ["Canada", "Germany", "France", "United Kingdom", "India", "Japan", "Korea", "Mexico", "Russia", "United States"]

country_df = pd.DataFrame(list(zip(countries, country_names)), columns=['country_code', 'country_name'])

output_path = os.path.join("Data", "countries.csv")
country_df.to_csv(output_path, index=False)

### Create video File

In [None]:
id = []
vid_title = []

for row in video_countries_df:
    if row['video_id'] not in video_id:
        id.append(row['video_id'])
        vid_title.append(row['title'])