In [26]:
import pandas as pd
import ast

In [27]:
# Linking to read from csv
colors = pd.read_csv('./datasets/colors.csv')
subject = pd.read_csv('./datasets/subject.csv')
dates = pd.read_fwf('./datasets/dates.txt',
                    header = None)


In [28]:
# Split data, only after first occurrence of '" '
dates[['Title', 'Date']] = dates[0].str.extract(r'"(.*?)" \((.*?)\)')

# Add  separate column for month
dates['Month'] = dates['Date'].str.extract(r'(\w+) \d+, \d+')

# Remove extra column
del dates[0]

In [29]:
# Remove unnamed column
del colors['Unnamed: 0']

# Renaming columns to a common name 'Title' for consistency
dates.rename(columns={'Title': 'Title'}, inplace=True)
subject.rename(columns={'TITLE': 'Title'}, inplace=True)
colors.rename(columns={'painting_title': 'Title'}, inplace=True)

# Standardize content of common column to title case
dates['Title'] = dates['Title'].str.title()
subject['Title'] = subject['Title'].str.title()
colors['Title'] = colors['Title'].str.title()

# Remove 'frames' data
subject = subject.loc[:, ~subject.columns.str.contains('FRAME', case=False)]

# Standardize content of all columns in `subject` to title case
subject = subject.rename(columns = str.title)


# Remove quotes from 'Title' for consistency before merging
subject['Title'] = subject['Title'].str.replace('"', '')

# Merging based on the standardized common column 'Title'
merged = pd.merge(dates, colors, on='Title')
merged = pd.merge(merged, subject, on='Title')

# Convert string representation of lists to actual lists
merged['colors'] = merged['colors'].apply(ast.literal_eval)

# Replace the '\r\n' in the lists
merged['colors'] = merged['colors'].apply(lambda x: [color.replace('\r\n', '') for color in x])

# Drop the 'season' and 'episode' columns
merged.drop(['season', 'episode'], axis=1, inplace=True)

# Save to CSV
merged.to_csv('bob_rocks.csv')

In [30]:
colors

Unnamed: 0,painting_index,img_src,Title,season,episode,num_colors,youtube_src,colors,color_hex,Black_Gesso,...,Liquid_Clear,Midnight_Black,Phthalo_Blue,Phthalo_Green,Prussian_Blue,Sap_Green,Titanium_White,Van_Dyke_Brown,Yellow_Ochre,Alizarin_Crimson
0,282,https://www.twoinchbrush.com/images/painting28...,A Walk In The Woods,1,1,8,https://www.youtube.com/embed/oh5p5f5_-7A,"['Alizarin Crimson', 'Bright Red', 'Cadmium Ye...","['#4E1500', '#DB0000', '#FFEC00', '#102E3C', '...",0,...,0,0,0,1,1,1,1,1,0,1
1,283,https://www.twoinchbrush.com/images/painting28...,Mt. Mckinley,1,2,8,https://www.youtube.com/embed/RInDWhYceLU,"['Alizarin Crimson', 'Bright Red', 'Cadmium Ye...","['#4E1500', '#DB0000', '#FFEC00', '#102E3C', '...",0,...,0,0,0,1,1,1,1,1,0,1
2,284,https://www.twoinchbrush.com/images/painting28...,Ebony Sunset,1,3,9,https://www.youtube.com/embed/UOziR7PoVco,"['Alizarin Crimson', 'Black Gesso', 'Bright Re...","['#4E1500', '#000000', '#DB0000', '#FFEC00', '...",1,...,0,0,0,1,1,1,1,1,0,1
3,285,https://www.twoinchbrush.com/images/painting28...,Winter Mist,1,4,3,https://www.youtube.com/embed/0pwoixRikn4,"['Prussian Blue', 'Titanium White', 'Van Dyke ...","['#021E44', '#FFFFFF', '#221B15']",0,...,0,0,0,0,1,0,1,1,0,0
4,286,https://www.twoinchbrush.com/images/painting28...,Quiet Stream,1,5,8,https://www.youtube.com/embed/DFSIQNjKRfk,"['Alizarin Crimson', 'Bright Red', 'Cadmium Ye...","['#4E1500', '#DB0000', '#FFEC00', '#102E3C', '...",0,...,0,0,0,1,1,1,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398,407,https://www.twoinchbrush.com/images/painting40...,Evergreen Valley,31,9,14,https://www.youtube.com/embed/mEU0stNfkxI,"['Alizarin Crimson', 'Bright Red', 'Cadmium Ye...","['#4E1500', '#DB0000', '#FFEC00', '#5F2E1F', '...",0,...,0,1,1,1,1,1,1,1,1,1
399,408,https://www.twoinchbrush.com/images/painting40...,Balmy Beach,31,10,13,https://www.youtube.com/embed/kMgd6r6c4vE,"['Alizarin Crimson', 'Black Gesso', 'Bright Re...","['#4E1500', '#000000', '#DB0000', '#FFEC00', '...",1,...,1,1,1,1,0,0,1,1,1,1
400,409,https://www.twoinchbrush.com/images/painting40...,Lake At The Ridge,31,11,12,https://www.youtube.com/embed/8QWvzEQ69Kw,"['Alizarin Crimson', 'Bright Red', 'Cadmium Ye...","['#4E1500', '#DB0000', '#FFEC00', '#5F2E1F', '...",0,...,0,1,1,0,1,1,1,1,1,1
401,410,https://www.twoinchbrush.com/images/painting41...,In The Midst Of Winter,31,12,7,https://www.youtube.com/embed/qx2IsmrCs3c,"['Alizarin Crimson', 'Dark Sienna', 'Midnight ...","['#4E1500', '#5F2E1F', '#000000', '#0C0040', '...",0,...,0,1,1,0,1,0,1,1,0,1
