# Categories

In [1]:
import pandas as pd

## Loading data

### Metadata

In [2]:
metadata_df = pd.read_csv(
    'data/src/metadata.tsv',
    sep='\t',
    names=['story_id', 'story_description', 'category_id', 'story_tags'],
)

In [3]:
metadata_df.head()

Unnamed: 0,story_id,story_description,category_id,story_tags
0,5881,Stuff that is not really important. Contains s...,10,corazon stephen dodge-ball volleyball butterfl...
1,14620,18 year old Grace has way more responsibilitie...,10,hilton karen underwear forest molest death run...
2,15577,Can i Have This Dance? Nicki Alab whom lives ...,10,touching newgirl holding joking revolted close...
3,24019,Giselle knew what she wanted. She wanted the b...,19,axel giselle liv dream boyfriend braydon seth ...
4,31450,From the popular anime show Yu Yu Hakusho come...,19,myruki yuyugang kurama spirit youko yuyuhakush...


In [4]:
metadata_df.shape

(245851, 4)

### Sample

In [5]:
sample_df = pd.read_csv(
    'data/aux/sample.tsv',
    sep='\t',
    names=['story_id', 'chapter_id', 'chapter_index', 'chapter_text']
)

In [6]:
sample_df.head()

Unnamed: 0,story_id,chapter_id,chapter_index,chapter_text
0,35,183814,0,We caught up on a lot of things that happened ...
1,35,184177,1,They split up to enclose around me. Crap. Wh...
2,35,184771,2,Where's Bing? Why aren't any of your roommate...
3,35,185259,3,Question Vote (it really helps I know) Copyrig...
4,35,186546,4,"I want to marry Miss, I need your blessing. Y..."


In [7]:
sample_df.shape

(1000, 4)

## Modifying data

In [8]:
category_dict = {
    0: 'Unknown',
    1: 'Teen Fiction',
    2: 'Poetry',
    3: 'Fantasy',
    4: 'Romance',
    5: 'Science Fiction',
    6: 'Fanfiction',
    7: 'Humor',
    8: 'Mystery / Thriller',
    9: 'Horror',
    10: 'Classics',
    11: 'Adventure',
    12: 'Paranormal',
    13: 'Spiritual',
    14: 'Action',
    16: 'Non-Fiction',
    17: 'Short Story',
    18: 'Vampire',
    19: 'Random',
    21: 'General Fiction',
    22: 'Werewolf',
    23: 'Historical Fiction',
    24: 'ChickLit',
}

In [9]:
metadata_df['category_name'] = metadata_df.category_id.apply(lambda x: category_dict[x])

In [10]:
metadata_df['story_tags'] = metadata_df.story_tags.apply(lambda x: set(x.split()))

In [11]:
metadata_df.head()

Unnamed: 0,story_id,story_description,category_id,story_tags,category_name
0,5881,Stuff that is not really important. Contains s...,10,"{evil, sam, hunny, dark, lucas, tangled, myste...",Classics
1,14620,18 year old Grace has way more responsibilitie...,10,"{violence, police, fashion, supernatural, whit...",Classics
2,15577,Can i Have This Dance? Nicki Alab whom lives ...,10,"{phone, newgirl, guy, jokes, mall, games, hous...",Classics
3,24019,Giselle knew what she wanted. She wanted the b...,19,"{liv, axel, summer, giselle, braydon, brother,...",Random
4,31450,From the popular anime show Yu Yu Hakusho come...,19,"{spirit, kurama, yuyugang, youko, yuyuhakusho,...",Random


## Saving data

In [12]:
metadata_df.to_csv('data/out/metadata.csv', index=False)

In [13]:
sample_df.to_csv('data/out/sample.csv', index=False)