## Import `pandas` library and load raw dataset

In [1]:
import pandas as pd

dataset = '../data/TA_restaurants_curated.csv'
df = pd.read_csv(dataset)

## Clean raw dataset

In [2]:
# rename first column
df.rename(columns={'Unnamed: 0':'Col_A'}, inplace=True)

# fill-in NaN values
df['Cuisine Style'] = df['Cuisine Style'].fillna("['No Data']")
df['Ranking'] = df['Ranking'].fillna(0)
df['Rating'] = df['Rating'].fillna(0)
df['Price Range'] = df['Price Range'].fillna("No Data")
df['Number of Reviews'] = df['Number of Reviews'].fillna(0)
df['Reviews'] = df['Reviews'].fillna("[[], []]")

# remove some characters
df['Cuisine Style'] = df['Cuisine Style'].replace(r"'", r"", regex=True)
df['Cuisine Style'] = df['Cuisine Style'].replace(r"\[", r"", regex=True)
df['Cuisine Style'] = df['Cuisine Style'].replace(r"]", r"", regex=True)

# add space between Cuisine Style items
df['Cuisine Style'] = df['Cuisine Style'].replace(r",", r", ", regex=True)

## Transform `Price Range` column values

In [3]:
df['Price Range'] = df['Price Range'].replace(r"$", r"Bargain")
df['Price Range'] = df['Price Range'].replace(r"$$ - $$$", r"Reasonable")
df['Price Range'] = df['Price Range'].replace(r"$$$$", r"High-end")

## Save processed dataset to new csv

In [4]:
df.to_csv('../data/TA_restaurants_curated_processed.csv', index=False, header=True)

## Filter dataset for highest (`5.0`) rating and save to new csv for `best restaurants` dashboard

In [5]:
df_5 = df[df['Rating'] == 5.0]

df_5.to_csv('../data/TA_restaurants_curated_filtered.csv', index=False, header=True)