# Create new card set

Taking output from story engine webapp: https://cardcreator.storyenginedeck.com/

and converting it to easier to use csv format for this application

In [1]:
import pandas as pd
import numpy as np
import re
## display options
pd.options.display.max_rows = 4000
pd.options.display.max_columns = 100
pd.set_option('max_colwidth', 100)

In [2]:
### function that splits data into columns
def split_csv_data(df):
    df[['type', 'option1','option2','option3', 'option4']] = df['data'].str.split(';',n=4,expand=True)
    ## remove extra semicolons from last column
    df['option4'] = df['option4'].str.strip(';')
    ## drop the data column
    df.drop(['data'],axis=1, inplace=True)
    return df

### makes dataframe/series for each card type
# def make_entry_list(df, val_type):
#     # filter dataframe by type
#     filtered_df = df[df['type'] == val_type]
#     vals = pd.concat([filtered_df['option1'], filtered_df['option2'], filtered_df['option3'], filtered_df['option4']], ignore_index=True)
#     vals.replace(r'^\s*$', 'None', regex=True, inplace=True)
#     # vals.dropna(how='any',inplace=True)
#     return vals

def make_entry_list(df, val_type):
    filtered_df = df[df['type'] == val_type].drop(['type'], axis = 1)
    cols = filtered_df.columns.to_list()

    col_df = pd.DataFrame()
    col_df = pd.concat([filtered_df[col] for col in cols], ignore_index=True)
    col_list = col_df.to_list()
    return col_list

def fix_agents(agent):
    new_agent = re.sub(r'^\W*\w+\W*', '', agent).capitalize()
    return new_agent

def remove_a(csv_name):
    df = pd.read_csv(f'../data/{csv_name}.csv')
    df['agent'] = df['agent'].apply(fix_agents)
    df.to_csv(fr'../data/{csv_name}.csv', index = False)
    return

# def create_cleaned_csv(csv_filename, card_set_name):
#     features = ['aspect','agent','engine', 'anchor', 'conflict']
#     card_set = pd.DataFrame()
#     df = pd.read_csv(f'../data/{csv_filename}', names = ['data'])
#     df = split_csv_data(df)
    
#     for feature in features:
#         card_set[feature] = make_entry_list(df, feature)
        
#     card_set.to_csv(f'../data/{card_set_name}.csv', index = False)
#     return

def create_cleaned_csv(csv_filename, card_set_name):

    df = pd.read_csv(f'../data/{csv_filename}', names = ['data'])
    df = split_csv_data(df)
    features = df['type'].unique().tolist()
    
    card_set = pd.DataFrame()
    col_lists = []

    for feature in features:
        col_lists.append(make_entry_list(df, feature))
        # card_set[feature] = make_entry_list(df, feature)
    # col_lists
    card_set = pd.DataFrame(col_lists).transpose()
    card_set.columns = features
    card_set.to_csv(f'../data/{card_set_name}.csv', index = False)

    return

### Create Card Sets

In [None]:
create_cleaned_csv('sci_fi.csv', 'sci-fi_cards')
remove_a('sci-fi_cards')

In [3]:
create_cleaned_csv('politics.csv', 'politics_cards')

In [None]:
create_cleaned_csv('horror.csv', 'horror_cards')
remove_a('horror_cards')

## Testing for Functions

In [None]:
politics = pd.DataFrame()
# features = ['aspect','agent','engine', 'anchor', 'conflict'] ## using this one filters by aspect. WTF?
features = ['agent','aspect','engine', 'anchor', 'conflict']  
### read in data
pol = pd.read_csv('../data/politics.csv', names=['data'])

pol = split_csv_data(pol)  ### this part is fine, not losing shit here

for feature in features:
    politics[feature] = make_entry_list(pol, feature) ## issue has to be in this function.
    
politics = politics[['aspect','agent','engine', 'anchor', 'conflict']]
politics.to_csv(r'../data/politics_cards.csv', index = False)

In [None]:
# politics
features = pol['type'].unique().tolist()

In [None]:
features

## Write full function to clean a csv

In [None]:
csv_filename = 'sci_fi.csv'
card_set_name = 'sci-fi_cards'
features = ['agent', 'aspect', 'engine', 'anchor', 'conflict']
card_set = pd.DataFrame()
df = pd.read_csv(f'../data/{csv_filename}', names = ['data'])
df = split_csv_data(df)
# df
# for feature in features:
#     card_set[feature] = make_entry_list(df, feature)

# card_set.to_csv(f'../data/{card_set_name}.csv', index = False)


In [None]:
def create_cleaned_csv(csv_filename, card_set_name):
    features = ['aspect','agent','engine', 'anchor', 'conflict']
    card_set = pd.DataFrame()
    df = pd.read_csv(f'../data/{csv_filename}', names = ['data'])
    df = split_csv_data(df)
    
    for feature in features:
        card_set[feature] = make_entry_list(df, feature)
        
    card_set.to_csv(f'../data/{card_set_name}.csv', index = False)
    return

In [None]:

create_cleaned_csv('sci_fi.csv', 'sci-fi_cards')

### Now read in csv as if we started with it

We need to import CSV, create entry lists with no NaN's, and then create a dict of dataframes to store each card type for use with the other functions we made.

In [None]:
df = pd.read_csv('../data/politics_cards.csv')

In [None]:
# df['anchor']

In [None]:
# df
df_dict = {}
card_types = features
for card_type in card_types:
    values = df[card_type]
    values.dropna(how='any',inplace=True)
    df_dict[card_type] = values

In [None]:
card_set = {}
for card_type in card_types:
    card_set[card_type] = df[card_type].dropna(how='any')

In [None]:
# card_set

In [None]:
def create_card_database(card_set_name):
    ### create dict of cardtypes for given cardset
    df = pd.read_csv(f'../data/{card_set_name}.csv')
    card_set = {}
    card_types = df.columns.to_list()
    for card_type in card_types:
        card_set[card_type] = df[card_type].dropna(how = 'any')
    return card_set

In [None]:
# create_card_database('sci-fi_cards')

## Get rid of A/An in sci-fi list

In [None]:
import re
sf = pd.read_csv('../data/sci-fi_cards.csv')

In [None]:
def fix_agents(agent):
    new_agent = re.sub(r'^\W*\w+\W*', '', agent).capitalize()
    return new_agent

def fix_sci_fi():
    sf = pd.read_csv('../data/sci-fi_cards.csv')
    sf['agent'] = sf['agent'].apply(fix_agents)
    sf.to_csv(r'../data/sci-fi_cards.csv', index = False)
    return

In [None]:
text = sf['agent'].sample().to_string(index = False)

re.sub(r'^\W*\w+\W*', '', text).capitalize()

In [None]:
for agent in sf['agent']:
    text = agent
    re.sub(r'^\W*\w+\W*', '', text).capitalize()

In [None]:
def fix_agents(agent):
    new_agent = re.sub(r'^\W*\w+\W*', '', agent).capitalize()
    return new_agent

sf['agent'] = sf['agent'].apply(fix_agents)

In [None]:
sf.to_csv(r'../data/sci-fi_cards.csv', index = False)

In [None]:
flist = sf.columns.to_list()

In [None]:
flist

## Fixing csv functions

In [None]:
csv_filename = 'politics.csv'
df = pd.read_csv(f'../data/{csv_filename}', names = ['data'])
df = split_csv_data(df)

In [None]:
# df

In [None]:
### make_entry_list function
def make_entry_list2(df, val_type):
    # val_type = 'agent'
    # filter dataframe by type
    filtered_df = df[df['type'] == val_type].drop(['type'], axis = 1)
    cols = filtered_df.columns.to_list()

    col_df = pd.DataFrame()
    col_df = pd.concat([filtered_df[col] for col in cols], ignore_index=True)
    col_list = col_df.to_list()
    return col_list

In [None]:
# col_df
# col_list

In [None]:
# vals.to_list()

In [None]:
csv_filename = 'politics.csv'
card_set_name = 'politics_cards'
df = pd.read_csv(f'../data/{csv_filename}', names = ['data'])

card_set = pd.DataFrame()

df = split_csv_data(df)
features = df['type'].unique().tolist()
col_lists = []

for feature in features:
    col_lists.append(make_entry_list2(df, feature))
    # card_set[feature] = make_entry_list(df, feature)
# col_lists
card_set = pd.DataFrame(col_lists).transpose()
card_set.columns = features
card_set.to_csv(f'../data/{card_set_name}.csv', index = False)

In [None]:
df2 = pd.DataFrame(col_lists).transpose()
df2.columns = features

In [None]:
card_set