# Clean and process content data 

In [1]:
#specify imports
import pandas as pd
import numpy as np
import ast

In [2]:
#load data
df = pd.read_csv("all_content.csv", encoding = "ISO-8859-1", index_col=0)

In [3]:
df.head(5)

Unnamed: 0,category,title,series,episode_name,description,description2,tags,image,more,tags2,publication_date,rating,duration_sec
0,Panel_Discussion,Spicks And Specks,Spicks And Specks,"Series 9 Missy Higgins, Dave O'Neil, Yeo & Jud...","Join Adam Hills, Myf Warhurst and Alan Brough ...",Missy Higgins and Dave O'Neil help Alan battle...,"['ABC TV', 'PANEL & DISCUSSION']",https://cdn.iview.abc.net.au/thumbs/i/le/LE200...,"Hosts Adam Hills, Myf Warhurst, Alan Brough","['abc1', 'australia', 'panel', 'music', 'cultu...",2021-06-20 20:27:00,PG,2701.0
1,Panel_Discussion,Would I Lie To You?,Would I Lie To You?,Series 13 Episode 4,Rob Brydon is back in the host's chair for ano...,Host Rob Brydon and team captains Lee Mack and...,"['ABC TV Plus', 'COMEDY', 'PANEL & DISCUSSION']",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW213...,Host Rob Brydon,"['abc2', 'uk', 'comedy', 'panel', 'entertainme...",2022-02-08 20:30:35,PG,1737.0
2,Panel_Discussion,Whovians,Whovians,Series 3 Episode 9,"Join Rove McManus, Tegan Higginbotham and Bajo...","Rove McManus, Tegan Higginbotham and Bajo are ...","['ABC TV Plus', 'COMEDY', 'PANEL & DISCUSSION']",https://cdn.iview.abc.net.au/thumbs/i/le/LE191...,"Hosts Rove McManus, Tegan Higginbotham, Steven...","['abc2', 'aussie', 'comedy', 'panel', 'enterta...",2020-03-05 21:38:00,PG,2090.0
3,Panel_Discussion,Mock The Week,Mock The Week,Series 20 Episode 13 End of Year Special,"Fast-tracked from the UK, the show combining t...",Dara O'Briain and Hugh Dennis are joined by an...,"['ABC TV Plus', 'COMEDY', 'PANEL & DISCUSSION']",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW279...,"Hosts Dara O'Briain, Hugh Dennis","['abc2', 'uk', 'comedy', 'panel', 'entertainme...",2021-12-30 20:29:58,M,1850.0
4,Panel_Discussion,You Can't Ask That (Simplified Chinese Subtitles),You Can't Ask That (Simplified Chinese Subtitles),Series 5 Episode 5 HIV Positive,Asking the most outrageous and uncomfortable q...,Eight people talk about what it's really like ...,['ABC TV'],https://cdn.iview.abc.net.au/thumbs/i/iv/IV210...,,"['iview', 'docs', 'abc1']",2021-02-05 07:00:00,M,1907.0


In [4]:
df = df.reset_index()

In [5]:
df = df.loc[:,df.columns != "index"]

## Feature engineering 

In [6]:
#category column contains title types and genres
df['category'].unique()

array(['Panel_Discussion', 'Movies', 'News', 'Family', 'Kids', 'Comedy',
       'Documentary', 'Drama', 'Arts', 'Education'], dtype=object)

In [7]:
df_new = df.copy()

In [8]:
#delete instances with no tags
df[df['tags']=="No tags found"]
idx_drop = df[df['tags']=="No tags found"].index

In [10]:
df_new = df_new.drop(idx_drop)

In [11]:
df_new['tags'] = df_new['tags'].apply(lambda x: ast.literal_eval(x))

In [12]:
#find unique tags
count = 0 
tags_unique = []
for index, row in df_new.iterrows():
    tags_row = row['tags']
    for tag in tags_row:
        if tag == "INDIGENOUS":
            count += 1
        if tag not in tags_unique:
            tags_unique.append(tag)

In [13]:
tags_unique

['ABC TV',
 'PANEL & DISCUSSION',
 'ABC TV Plus',
 'COMEDY',
 'ABC NEWS',
 'ARTS & CULTURE',
 'SPORT',
 'INDIGENOUS',
 'SCIENCE',
 'DOCUMENTARY',
 'ABC ARTS',
 'DRAMA',
 'MOVIES',
 'REGIONAL AUSTRALIA',
 'EDUCATION',
 'ABC ME',
 'FAMILY',
 'ABC Kids',
 'LIFESTYLE']

In [16]:
genres = ['ABC TV',
 'PANEL & DISCUSSION',
 'ABC TV Plus',
 'COMEDY',
 'ABC NEWS',
 'ARTS & CULTURE',
 'SPORT',
 'INDIGENOUS',
 'SCIENCE',
 'DOCUMENTARY',
 'ABC ARTS',
 'DRAMA',
 'MOVIES',
 'REGIONAL AUSTRALIA',
 'EDUCATION',
 'ABC ME',
 'FAMILY',
 'ABC Kids',
 'LIFESTYLE']

In [16]:
df_new['category'].unique()

array(['Panel_Discussion', 'Movies', 'News', 'Family', 'Kids', 'Comedy',
       'Documentary', 'Drama', 'Arts', 'Education'], dtype=object)

In [22]:
#create new feature: title type
diff_title_types = ['Documentary', 'Movie', 'News','Panel_discussion','tv_shows']


In [23]:
df_new['title_type'] = df_new['category'].apply(lambda x: 'Documentary' if x == 'Documentary' else('Movie' if x=="Movies" else("Panel_Discussion" if x=="Panel_Discussion" else("News" if x=="News" else 0))))

In [24]:
def check_genre(lst_genres):
    new_genres = []
    diff_genres = ['ABC NEWS','ABC ARTS', 'ABC ME','ABC Kids','SCIENCE','DRAMA','INDIGENOUS','COMEDY','FAMILY','EDUCATION','LIFESTYLE','DOCUMENTARY','ARTS & CULTURE','REGIONAL AUSTRALIA','PANEL & DISCUSSION','SPORT']
    for genre in lst_genres:
        if genre in diff_genres:
            if (genre == 'ABC Kids') | (genre == 'ABC ME'):
                genre = 'KIDS'
            if genre == 'ABC ARTS':
                genre = 'ARTS & CULTURE'
            if genre == 'ABC NEWS':
                genre = 'NEWS'
            if genre not in new_genres:
                new_genres.append(genre)
    return str(new_genres)         

In [25]:
df_new['genre'] = df_new['tags'].apply(lambda x: check_genre(x))

In [37]:
#check if transformation went oke
genres_check = ['NEWS', 'KIDS','SCIENCE','DRAMA','INDIGENOUS','COMEDY','FAMILY','EDUCATION','LIFESTYLE','DOCUMENTARY','ARTS & CULTURE','REGIONAL AUSTRALIA','PANEL & DISCUSSION','SPORT']
diff_genres = []
for index, row in df_new.iterrows():
    genres = ast.literal_eval(row['genre'])
    for genre in genres:
        if genre not in genres_check:
            if genre not in diff_genres:
                diff_genres.append(genre)

#diff_genres is an empty list, hence ok

## Cleaning title type: tv 

In [39]:
#check the 0's - stands for tv-shows
tv = df_new[df_new['title_type'] == 0]

In [40]:
tv['category'].value_counts()

Kids         4298
Education    2529
Family       2056
Comedy       1295
Drama        1174
Arts          912
Name: category, dtype: int64

In [41]:
tv.isna().sum()

category               0
title                  0
series               347
episode_name           3
description            0
description2           1
tags                   0
image                  0
more                7952
tags2                  1
publication_date       1
rating               480
duration_sec           1
title_type             0
genre                  0
dtype: int64

## Clean kids

In [42]:
kids_tv = tv[tv['category'] == "Kids"]
kids_na = kids_tv[kids_tv['series'].isna()]
kids_na.sort_values(by="duration_sec", ascending=False)

Unnamed: 0,category,title,series,episode_name,description,description2,tags,image,more,tags2,publication_date,rating,duration_sec,title_type,genre
3954,Kids,Thomas And Friends: Big World! Big Adventures!...,,Thomas And Friends: Big World! Big Adventures!...,Join Thomas as he embarks on an epic journey a...,Join Thomas as he embarks on an epic journey a...,[ABC Kids],https://cdn.iview.abc.net.au/thumbs/i/zw/ZW137...,,"['abc4kids', '4machines', '4action', '4thomas'...",2022-03-20 01:25:00,G,4885.0,0,['KIDS']
4256,Kids,Play School: The Very Silly Special,,Play School: The Very Silly Special,"Join Michelle, Leah, Matt, Alex and Emma, plus...","Join Michelle, Leah, Matt, Alex and Emma, plus...",[ABC Kids],https://cdn.iview.abc.net.au/thumbs/i/ck/CK204...,,"['abc4kids', 'abc90', 'abc', '90']",2021-07-18 09:06:06,G,2644.0,0,['KIDS']
7689,Kids,Play School: Beginnings and Endings,,Play School: Beginnings and Endings,Play School celebrates new life and reflects o...,Play School celebrates new life and reflects o...,[ABC Kids],https://cdn.iview.abc.net.au/thumbs/i/ck/CK191...,,"['abc4kids', '4upper', '4learn', '4liveaction'...",2021-08-12 06:01:00,G,1814.0,0,['KIDS']
5079,Kids,Play School: Kiya's Excellent eBirthday,,Play School: Kiya's Excellent eBirthday,It's the countdown to Kiya's big birthday part...,It's the countdown to Kiya's big birthday part...,[ABC Kids],https://cdn.iview.abc.net.au/thumbs/i/ck/CK201...,,"['abc4kids', '4lower', '4upper', '4learn', '4l...",2021-04-27 06:00:00,G,1784.0,0,['KIDS']
3907,Kids,"Play School: Ready, Set, Big School",,"Play School: Ready, Set, Big School",Come along with Little Ted and Kiya as they pr...,Come along with Little Ted and Kiya as they pr...,[ABC Kids],https://cdn.iview.abc.net.au/thumbs/i/ck/CK201...,,['abc4kids'],2021-12-08 06:00:30,G,1741.0,0,['KIDS']
6041,Kids,Play School's Let's Eat,,Play School's Let's Eat,You're invited to dine with Play School! A spe...,You're invited to dine with Play School! A spe...,[ABC Kids],https://cdn.iview.abc.net.au/thumbs/i/ck/CK204...,,['abc4kids'],2022-01-10 09:05:27,G,1686.0,0,['KIDS']
4214,Kids,Play School: Acknowledgement of Country,,Play School: Acknowledgement of Country,"Join Luke, Miranda and Hunter to give an Ackno...","Join Luke, Miranda and Hunter to give an Ackno...",[ABC Kids],https://cdn.iview.abc.net.au/thumbs/i/ck/CK191...,,['abc4kids'],2021-07-08 06:00:00,G,1638.0,0,['KIDS']
4175,Kids,Stick Man,,Stick Man,Stick Man lives in the family tree with his St...,Stick Man lives in the family tree with his St...,"[ABC Kids, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW002...,"Director Jeroen Jaspaert, Daniel Snaddon","['abc4kids', 'family-viewing', '4xmas-features...",2020-04-17 11:44:30,G,1626.0,0,"['KIDS', 'FAMILY']"
3949,Kids,Zog,,Zog,"Based on the picture book by Julia Donaldson, ...","Based on the picture book by Julia Donaldson, ...","[ABC Kids, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW124...,"Director Max Lang, Daniel Snaddon","['abc4kids', '4feature', '4featured', 'abcme-s...",2020-04-17 11:45:30,G,1621.0,0,"['KIDS', 'FAMILY']"
8087,Kids,The Gruffalo,,The Gruffalo,Tells the magical tale of a mouse who takes a ...,Tells the magical tale of a mouse who takes a ...,"[ABC Kids, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX058...,"Director Max Lang, Jakob Schuh","['4family', '4upper', '4feature', 'abc4kids', ...",2020-04-17 11:43:30,G,1614.0,0,"['KIDS', 'FAMILY']"


In [43]:
#change values
df_new.loc[3954,'category'] = 'Movie'
df_new.loc[4175,'category'] = 'Movie'
df_new.loc[3949,'category'] = 'Movie'

df_new.loc[8087,'category'] = 'Movie'
df_new.loc[4231,'category'] = 'Movie'
df_new.loc[7098,'category'] = 'Movie'
df_new.loc[6571,'category'] = 'Movie'
df_new.loc[6937,'category'] = 'Movie'
df_new.loc[7578,'category'] = 'Movie'
df_new.loc[6938,'category'] = 'Movie'

## Education 

In [44]:
edu_tv = tv[tv['category'] == "Education"]
edu_na = edu_tv[edu_tv['series'].isna()]
edu_na.sort_values(by="duration_sec", ascending=False)

Unnamed: 0,category,title,series,episode_name,description,description2,tags,image,more,tags2,publication_date,rating,duration_sec,title_type,genre
15011,Education,Montserrat: Living With Volcanoes,,Montserrat: Living With Volcanoes,This program deals with the impacts of the vol...,This program deals with the impacts of the vol...,"[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW113...,,"['abc3', 'education', 'secondary-geography']",2017-08-17 11:00:00,G,2750.0,0,"['KIDS', 'EDUCATION']"
13433,Education,Iceland: Living With Volcanoes,,Iceland: Living With Volcanoes,"Focussing on the 2010 eruption, this film asks...","Focussing on the 2010 eruption, this film asks...","[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW113...,,"['abc3', 'education', 'secondary-geography', '...",2017-07-25 10:00:00,G,2554.0,0,"['KIDS', 'EDUCATION']"
14221,Education,Coastal Processes And Land Forms,,Coastal Processes And Land Forms,What happens when you take away a village's na...,What happens when you take away a village's na...,"[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW113...,,"['abc3', 'education', 'secondary-geography']",2017-08-17 11:00:00,G,2367.0,0,"['KIDS', 'EDUCATION']"
13686,Education,Carbon And Water Cycles In The Rainforest,,Carbon And Water Cycles In The Rainforest,"Filmed in the Amazon and Borneo, this film ill...","Filmed in the Amazon and Borneo, this film ill...","[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW168...,,"['abc3', 'education', 'secondary-geography']",2018-07-24 11:30:00,G,2059.0,0,"['KIDS', 'EDUCATION']"
13862,Education,reIMAGINED: Romeo & Juliet,,reIMAGINED: Romeo & Juliet,27 music students and 8 dancers from schools a...,27 music students and 8 dancers from schools a...,"[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW168...,,"['abc3', 'education', 'secondary-arts']",2018-09-21 11:08:33,G,1757.0,0,"['KIDS', 'EDUCATION']"
13805,Education,Making Media,,Making Media,We go behind the scenes on the creation of a s...,We go behind the scenes on the creation of a s...,"[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX958...,,"['education', 'abc3', 'secondary-arts']",2018-10-15 12:28:00,G,1745.0,0,"['KIDS', 'EDUCATION']"
13043,Education,Mumbai - Inside Dharavi,,Mumbai - Inside Dharavi,Travel inside India's biggest slum and see wha...,Travel inside India's biggest slum and see wha...,"[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW193...,,"['abc3', 'education', 'secondary-geography']",2019-04-30 11:37:00,G,1683.0,0,"['KIDS', 'EDUCATION']"
13078,Education,"Desertification: Causes, Impacts and Management",,"Desertification: Causes, Impacts and Management",Yahya's nomadic family has been forced off the...,Yahya's nomadic family has been forced off the...,"[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW205...,,"['abc3', 'education', 'secondary-geography']",2019-11-11 06:01:00,G,1528.0,0,"['KIDS', 'EDUCATION']"
15006,Education,Life In Medieval Europe,,Life In Medieval Europe,"Set in 1350, this medieval drama unfolds throu...","Set in 1350, this medieval drama unfolds throu...","[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW027...,,"['abc3', 'education', 'secondary-history']",2020-03-16 09:00:00,G,1475.0,0,"['KIDS', 'EDUCATION']"
13664,Education,Hot Deserts: Opportunities and Challenges,,Hot Deserts: Opportunities and Challenges,"Using Morocco's Sahara desert as a case study,...","Using Morocco's Sahara desert as a case study,...","[ABC ME, EDUCATION]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW205...,,"['abc3', 'education', 'secondary-maths', 'seco...",2019-11-11 06:01:00,G,1434.0,0,"['KIDS', 'EDUCATION']"


In [46]:
#change value
df_new.loc[15011,'category'] = 'Movie'

## Family 

In [47]:
fam_tv = tv[tv['category'] == "Family"]
fam_na = fam_tv[fam_tv['series'].isna()]
fam_na.sort_values(by="duration_sec", ascending=False)

Unnamed: 0,category,title,series,episode_name,description,description2,tags,image,more,tags2,publication_date,rating,duration_sec,title_type,genre
2866,Family,Sleeping Beauty,,Sleeping Beauty,The Australian Ballet presents Sleeping Beauty...,The Australian Ballet presents Sleeping Beauty...,"[ABC TV Plus, ARTS & CULTURE, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW229...,,"['abc2', 'arts', 'entertainment', 'concert', '...",2021-06-18 07:00:00,G,7656.0,0,"['ARTS & CULTURE', 'FAMILY']"
2030,Family,Romeo & Juliet,,Romeo & Juliet,"Graeme Murphy's Romeo and Juliet, performed by...","Graeme Murphy's Romeo and Juliet, performed by...","[ABC TV Plus, ABC ARTS, ARTS & CULTURE, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW230...,,"['abc2', 'arts', 'abcarts', 'ballet', 'enterta...",2021-06-18 07:00:00,PG,7168.0,0,"['ARTS & CULTURE', 'FAMILY']"
2881,Family,Cinderella,,Cinderella,Alexei Ratmansky's Cinderella has all the elem...,Alexei Ratmansky's Cinderella has all the elem...,"[ABC TV Plus, ARTS & CULTURE, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW230...,,"['abc2', 'arts', 'entertainment', 'concert', '...",2021-06-18 07:00:00,G,6685.0,0,"['ARTS & CULTURE', 'FAMILY']"
3178,Family,Coppelia,,Coppelia,"A sparkling tale of magic and mischief, Coppel...","A sparkling tale of magic and mischief, Coppel...","[ABC TV Plus, ABC ARTS, ARTS & CULTURE, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW230...,,"['abc2', 'arts', 'abcarts', 'ballet', 'opera',...",2021-06-18 07:00:00,G,6511.0,0,"['ARTS & CULTURE', 'FAMILY']"
3731,Family,The Legend Of The Five,,The Legend Of The Five,When a group of misfit teenagers encounter an ...,When a group of misfit teenagers encounter an ...,"[ABC ME, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW264...,Director Joanne Samuel,"['abc3', 'childrens', '3featured', 'sci-fi', '...",2022-04-01 04:09:33,PG,5773.0,0,"['KIDS', 'FAMILY']"
2875,Family,Legends Of Oz: Dorothy's Return,,Legends Of Oz: Dorothy's Return,An animated musical based on the adventure boo...,An animated musical based on the adventure boo...,"[ABC ME, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW041...,"Director Daniel St. Pierre, Will Finn","['abc3', 'childrens', '3featured', 'abc3-faves...",2022-04-01 20:30:39,PG,5264.0,0,"['KIDS', 'FAMILY']"
3538,Family,Here Comes The Grump,,Here Comes The Grump,An evil wizard presides over the entire world ...,An evil wizard presides over the entire world ...,"[ABC ME, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW275...,,"['abc3', '3featured', 'family-viewing-abcme', ...",2022-03-18 20:31:49,PG,5111.0,0,"['KIDS', 'FAMILY']"
3625,Family,Manou The Swift,,Manou The Swift,The little orphaned swift Manou is adopted by ...,The little orphaned swift Manou is adopted by ...,"[ABC ME, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW299...,,"['abc3', 'family-viewing']",2022-03-25 20:30:01,G,5093.0,0,"['KIDS', 'FAMILY']"
2330,Family,Nowhere Boys: The Book of Shadows,,Nowhere Boys: The Book of Shadows,The Nowhere Boys are drawn together for one fi...,The Nowhere Boys are drawn together for one fi...,"[ABC ME, FAMILY, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/ch/publi...,,"['abc3', '3feature', 'movie', '3family', '3act...",2021-10-17 06:00:00,PG,4875.0,0,"['KIDS', 'FAMILY']"
3338,Family,David Attenborough's Flying Monsters,,David Attenborough's Flying Monsters,"For thousands of years, humans have believed t...","For thousands of years, humans have believed t...","[ABC TV Plus, DOCUMENTARY, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX970...,Host Sir David Attenborough,"['abc2', 'docs', 'nature', 'uk', 'featured', '...",2021-01-01 07:00:00,G,4038.0,0,"['DOCUMENTARY', 'FAMILY']"


## Drama 

In [48]:
drama_tv = tv[tv['category'] == "Drama"]
drama_na = drama_tv[drama_tv['series'].isna()]
drama_na.sort_values(by="duration_sec", ascending=False)

Unnamed: 0,category,title,series,episode_name,description,description2,tags,image,more,tags2,publication_date,rating,duration_sec,title_type,genre
10903,Drama,Funny Girl,,Funny Girl,"The life of the 1930s comedienne Fannie Brice,...","The life of the 1930s comedienne Fannie Brice,...","[ABC TV, ABC TV Plus, COMEDY, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW302...,Director William Wyler,"['abc1', 'abc2', 'biography', 'musical', 'come...",2021-06-25 07:00:00,G,8935.0,0,"['COMEDY', 'DRAMA']"
11004,Drama,Uncle Vanya,,Uncle Vanya,An adaptation of Anton Chekhov's masterpiece -...,An adaptation of Anton Chekhov's masterpiece -...,"[ABC TV Plus, DRAMA]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW273...,"Cast Toby Jones, Rosalind Eleazar, Aimee Lou W...","['abc2', 'drama', 'art', 'theatre', 'period-dr...",2022-02-02 20:32:15,M,8912.0,0,['DRAMA']
11255,Drama,Oliver!,,Oliver!,"Young orphan, Oliver Twist, escapes his poor l...","Young orphan, Oliver Twist, escapes his poor l...","[ABC TV, ABC TV Plus, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW302...,Director Carol Reed,"['abc1', 'abc2', 'drama', 'musical', 'period-d...",2021-06-25 07:00:00,PG,8806.0,0,['DRAMA']
11267,Drama,Primary Colours,,Primary Colours,An all star cast sees Governor Stanton as a pr...,An all star cast sees Governor Stanton as a pr...,"[ABC TV, ABC TV Plus, MOVIES, DRAMA]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW287...,Director Mike Nichols,"['abc1', 'abc2', 'film', 'drama', 'politics', ...",2021-06-01 07:00:00,MA,8203.0,0,['DRAMA']
11477,Drama,On The Road,,On The Road,Aspiring writer Sal Paradise has his world roc...,Aspiring writer Sal Paradise has his world roc...,"[ABC TV, ABC TV Plus, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW291...,Director Walter Salles,"['abc1', 'abc2', 'usa', 'drama', 'adaptation',...",2021-06-01 07:00:00,MA,8014.0,0,['DRAMA']
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10799,Drama,Midsomer Murders: 20th Anniversary Special,,Midsomer Murders: 20th Anniversary Special,"Presented by John Nettles, the original DCI Ba...","Presented by John Nettles, the original DCI Ba...","[ABC TV, DOCUMENTARY, DRAMA]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW298...,Host John Nettles,"['abc1', 'uk', 'docs', 'drama', 'crime-drama',...",2021-08-06 22:00:00,M,3547.0,0,"['DOCUMENTARY', 'DRAMA']"
11200,Drama,All Creatures Great And Small: Christmas Special,,All Creatures Great And Small: Christmas Special,It's Christmas Eve and the day before Helen an...,It's Christmas Eve and the day before Helen an...,"[ABC TV, DRAMA]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW244...,,"['abc1', 'drama', 'period-drama', 'family', 'f...",2022-03-12 20:32:14,PG,3530.0,0,['DRAMA']
11155,Drama,Re-Frame 2020,,Re-Frame 2020,More than 4 million Australians live with a di...,More than 4 million Australians live with a di...,"[ABC TV, ABC ARTS, ARTS & CULTURE, DRAMA, COME...",https://cdn.iview.abc.net.au/thumbs/i/rf/RF200...,Host Bridie McKim,"['abc1', 'disability', 'arts', 'abcarts', 'dra...",2020-11-28 14:30:00,M,3480.0,0,"['ARTS & CULTURE', 'DRAMA', 'COMEDY', 'DOCUMEN..."
11006,Drama,Good Grief,,Good Grief,Good Grief is the story of two friends navigat...,Good Grief is the story of two friends navigat...,"[ABC TV Plus, DRAMA]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW324...,"Cast Sian Clifford, Nikesh Patel","['abc2', 'drama', 'art', 'theatre', 'melodrama...",2022-02-09 20:31:14,MA,2992.0,0,['DRAMA']


## Comedy 

In [49]:
com_tv = tv[tv['category'] == "Comedy"]
com_na = com_tv[com_tv['series'].isna()]
com_na.sort_values(by="duration_sec", ascending=False)

Unnamed: 0,category,title,series,episode_name,description,description2,tags,image,more,tags2,publication_date,rating,duration_sec,title_type,genre
8526,Comedy,Funny Girl,,Funny Girl,"The life of the 1930s comedienne Fannie Brice,...","The life of the 1930s comedienne Fannie Brice,...","[ABC TV, ABC TV Plus, COMEDY, DRAMA, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW302...,Director William Wyler,"['abc1', 'abc2', 'biography', 'musical', 'come...",2021-06-25 07:00:00,G,8935.0,0,"['COMEDY', 'DRAMA']"
9393,Comedy,The Pirates Of Penzance (1994),,The Pirates Of Penzance (1994),This spectacular production of the popular Gil...,This spectacular production of the popular Gil...,"[ABC TV Plus, ABC ARTS, ARTS & CULTURE, COMEDY]",https://cdn.iview.abc.net.au/thumbs/i/ae/AE941...,,"['abc2', 'abcarts', 'australia', 'arts', 'clas...",2021-06-18 07:00:00,G,8865.0,0,"['ARTS & CULTURE', 'COMEDY']"
9232,Comedy,The Producers,,The Producers,"After putting together a Broadway flop, down-o...","After putting together a Broadway flop, down-o...","[ABC TV, ABC TV Plus, COMEDY, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW301...,Director Susan Stroman,"['abc1', 'abc2', 'comedy', 'comedy-drama', 'mu...",2021-06-25 07:00:00,M,7731.0,0,['COMEDY']
8994,Comedy,Paris,,Paris,A series of interwoven stories will have you e...,A series of interwoven stories will have you e...,"[ABC TV, ABC TV Plus, DRAMA, COMEDY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW287...,Director Cedric Klapisch,"['abc1', 'abc2', 'drama', 'comedy', 'romance',...",2021-06-01 07:00:00,M,7433.0,0,"['DRAMA', 'COMEDY']"
8625,Comedy,Dawn French Live: 30 Million Minutes,,Dawn French Live: 30 Million Minutes,"Performing live in London's West End, legendar...","Performing live in London's West End, legendar...","[ABC TV Plus, COMEDY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW173...,Host Dawn French,"['abc2', 'comedy', 'stand-up', 'uk', 'featured...",2022-03-16 03:18:03,M,6980.0,0,['COMEDY']
8983,Comedy,Looking For Eric,,Looking For Eric,"His wife has gone, his stepsons are out of con...","His wife has gone, his stepsons are out of con...","[ABC TV, ABC TV Plus, DRAMA, COMEDY, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW290...,Director Ken Loach,"['abc1', 'abc2', 'uk', 'drama', 'comedy', '200...",2021-06-01 07:00:00,MA,6688.0,0,"['DRAMA', 'COMEDY']"
8439,Comedy,Snow Cake,,Snow Cake,The heart-warming story of an unlikely friends...,The heart-warming story of an unlikely friends...,"[ABC TV, ABC TV Plus, DRAMA, COMEDY, MOVIES]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW287...,Director Marc Evans,"['abc1', 'abc2', 'drama', 'comedy', 'romance',...",2021-06-01 07:00:00,M,6431.0,0,"['DRAMA', 'COMEDY']"
9060,Comedy,Children Of The Revolution,,Children Of The Revolution,Few knew that Stalin spent his last night in t...,Few knew that Stalin spent his last night in t...,"[ABC TV, DRAMA, MOVIES, COMEDY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW303...,"Cast Judy Davis, Richard Roxburgh, Sam Neill, ...","['abc1', 'drama', 'film', 'comedy', 'australia...",2021-10-01 07:00:00,M,6028.0,0,"['DRAMA', 'COMEDY']"
8152,Comedy,California Suite,,California Suite,Neil Simon's laugh-drenched adaptation of his ...,Neil Simon's laugh-drenched adaptation of his ...,"[ABC TV, ABC TV Plus, MOVIES, COMEDY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW301...,Director Herbert Ross,"['abc1', 'abc2', 'film', 'comedy', 'funny', 'q...",2021-06-25 07:00:00,M,5899.0,0,['COMEDY']
8409,Comedy,Love Birds,,Love Birds,Doug is a regular bloke with a broken heart. T...,Doug is a regular bloke with a broken heart. T...,"[ABC TV, ABC TV Plus, COMEDY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW290...,Director Paul Murphy,"['abc1', 'abc2', 'comedy', 'rom-com', 'feel-go...",2021-06-01 07:00:00,PG,5830.0,0,['COMEDY']


## Arts 

In [50]:
arts_tv = tv[tv['category'] == "Arts"]
arts_na = arts_tv[arts_tv['series'].isna()]
arts_na.sort_values(by="duration_sec", ascending=False)[:20]

Unnamed: 0,category,title,series,episode_name,description,description2,tags,image,more,tags2,publication_date,rating,duration_sec,title_type,genre
12608,Arts,The Pirates Of Penzance (1994),,The Pirates Of Penzance (1994),This spectacular production of the popular Gil...,This spectacular production of the popular Gil...,"[ABC TV Plus, ABC ARTS, ARTS & CULTURE, COMEDY]",https://cdn.iview.abc.net.au/thumbs/i/ae/AE941...,,"['abc2', 'abcarts', 'australia', 'arts', 'clas...",2021-06-18 07:00:00,G,8865.0,0,"['ARTS & CULTURE', 'COMEDY']"
11967,Arts,Crowded House Live At Sydney Opera House,,Crowded House Live At Sydney Opera House,An unforgettable event from the steps of Sydne...,An unforgettable event from the steps of Sydne...,"[ABC TV, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/fa/FA160...,"Cast Neil Finn, Nick Seymour, Mitchell Froom, ...","['abc1', 'arts', 'australia', 'music', 'perfor...",2021-06-30 07:00:00,PG,8822.0,0,['ARTS & CULTURE']
12464,Arts,Opera on Sydney Harbour: Carmen,,Opera on Sydney Harbour: Carmen,"Opera Australia performs Bizet's Carmen, again...","Opera Australia performs Bizet's Carmen, again...","[ABC TV, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/zx/ZX954...,Director Cameron Kirkpatrick,"['abc1', 'abcarts', 'arts', 'opera', 'performa...",2021-06-18 07:00:00,PG,8467.0,0,['ARTS & CULTURE']
12272,Arts,Australia Day Live 2022,,Australia Day Live 2022,A concert and fireworks spectacular from Sydne...,A concert and fireworks spectacular from Sydne...,"[ABC TV, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/rv/RV210...,"Host Jeremy Fernandez, Casey Donovan, John For...","['abc1', 'aussie', 'australia', 'event', 'conc...",2022-01-26 21:31:00,PG,7871.0,0,['ARTS & CULTURE']
12239,Arts,Sleeping Beauty,,Sleeping Beauty,The Australian Ballet presents Sleeping Beauty...,The Australian Ballet presents Sleeping Beauty...,"[ABC TV Plus, ARTS & CULTURE, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW229...,,"['abc2', 'arts', 'entertainment', 'concert', '...",2021-06-18 07:00:00,G,7656.0,0,"['ARTS & CULTURE', 'FAMILY']"
12203,Arts,The Importance Of Being Miriam,,The Importance Of Being Miriam,"In this hilarious, heart-warming and thought-p...","In this hilarious, heart-warming and thought-p...","[ABC TV Plus, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW297...,Director Peter J Adams,"['abc2', 'abcarts', 'australia', 'arts', 'biog...",2021-06-18 07:00:00,PG,7213.0,0,['ARTS & CULTURE']
11849,Arts,Romeo & Juliet,,Romeo & Juliet,"Graeme Murphy's Romeo and Juliet, performed by...","Graeme Murphy's Romeo and Juliet, performed by...","[ABC TV Plus, ABC ARTS, ARTS & CULTURE, FAMILY]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW230...,,"['abc2', 'arts', 'abcarts', 'ballet', 'enterta...",2021-06-18 07:00:00,PG,7168.0,0,"['ARTS & CULTURE', 'FAMILY']"
12423,Arts,Opera On Sydney Harbour: La Boheme,,Opera On Sydney Harbour: La Boheme,Experience the romance of the original bohemia...,Experience the romance of the original bohemia...,"[ABC TV Plus, ABC ARTS, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW230...,Director Andy Morton,"['abc2', 'abcarts', 'arts', 'opera', 'performa...",2021-06-18 07:00:00,G,7142.0,0,['ARTS & CULTURE']
11942,Arts,Kylie Minogue Golden: Live in Concert,,Kylie Minogue Golden: Live in Concert,Filmed at various UK venues over the course of...,Filmed at various UK venues over the course of...,"[ABC TV Plus, ARTS & CULTURE]",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW314...,Cast Kylie Minogue,"['abc2', 'arts', 'australia', 'music', 'perfor...",2021-11-12 20:32:04,G,7074.0,0,['ARTS & CULTURE']
11774,Arts,Becoming Jane,,Becoming Jane,"When feisty, 20-year-old, budding novelist, Ja...","When feisty, 20-year-old, budding novelist, Ja...","[ABC TV, ABC TV Plus, DRAMA, ARTS & CULTURE, M...",https://cdn.iview.abc.net.au/thumbs/i/zw/ZW284...,Director Julian Jarrold,"['abc1', 'abc2', 'drama', 'period-drama', '170...",2021-06-01 07:00:00,PG,6915.0,0,"['DRAMA', 'ARTS & CULTURE']"


## Further clean 

In [51]:
df_new['title_type'] = df_new['title_type'].apply(lambda x: x if x != 0 else "tv")

In [52]:
df_new['title_type'].value_counts()

tv                  12264
News                 1172
Documentary          1122
Panel_Discussion      477
Movie                 143
Name: title_type, dtype: int64

## Check empty genres 

In [53]:
#check empty genres
df_check = df_new.copy()
empty_genres = df_check[df_check['genre']== '[]']

In [55]:
pd_input = df_check.iloc[0]['genre']

In [56]:
pd_input

"['PANEL & DISCUSSION']"

In [57]:
idx_pd_genre = empty_genres[empty_genres['title_type']=='Panel_Discussion'].index.to_list()
df_new.loc[idx_pd_genre,'genre'] = pd_input

In [58]:
df_new[df_new['genre']== '[]']['title_type'].value_counts()

tv       20
News     13
Movie     5
Name: title_type, dtype: int64

In [59]:
tv_empty = empty_genres[empty_genres['title_type']=='tv']
idx_tv1_genre = tv_empty[tv_empty['category']=='Drama'].index.to_list()
idx_tv2_genre = tv_empty[tv_empty['category']=='Arts'].index.to_list()
df_new.loc[idx_pd_genre,'genre'] = pd_input

In [60]:
idx_pd_genre = empty_genres[empty_genres['title_type']=='tv'].index.to_list()
df_new.loc[idx_tv2_genre,'genre'] = "['ARTS & CULTURE']"
df_new.loc[idx_tv1_genre,'genre'] = "['DRAMA']"

In [61]:
idx_news_genre = empty_genres[empty_genres['title_type']=='News'].index.to_list()
df_new.loc[idx_news_genre,'genre'] = "['NEWS']"

In [62]:
empty_genres[empty_genres['title_type']=='Movie']
df_new.loc[[515,571,608,548],'genre'] = "['DRAMA']"
df_new.loc[607,'genre'] = "['COMEDY']"

In [63]:
df_new[df_new['genre']== '[]']['title_type'].value_counts()

Series([], Name: title_type, dtype: int64)

In [64]:
def check_for_movie_cat(lst_tags):
    for tag in lst_tags:
        if tag == "MOVIES":
            return True
    return False

In [65]:
#instances with the tag MOVIES should have title type Movie
df_new['title_type'] = df_new[['title_type','tags']].apply(lambda x: 'Movie' if check_for_movie_cat(x['tags']) else x['title_type'], axis=1)

In [66]:
df_new['title_type'].value_counts()

tv                  12118
News                 1172
Documentary          1113
Panel_Discussion      477
Movie                 298
Name: title_type, dtype: int64

## Export data 

In [73]:
df_final = df_new.loc[:,df_new.columns != "category"]
df_final.to_csv("programs_abc.csv", index=False)