# Data Cleaning

## References
* https://www.twilio.com/blog/2017/02/an-easy-way-to-read-and-write-to-a-google-spreadsheet-in-python.html
* https://stackoverflow.com/questions/42063716/pandas-sum-up-multiple-columns-into-one-column-without-last-column

In [32]:
# Dependencies
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np
import datetime as dt

## Pull data from Google Sheets

In [33]:
# Interact with Google Drive API
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json', scope)
client = gspread.authorize(creds)

In [34]:
# See all worksheets
all_sheets = client.open('Wardrobe Tracking')
list_sheets = all_sheets.worksheets()
print(list_sheets)

[<Worksheet 'Tops' id:0>, <Worksheet 'Outerwear' id:1224580649>, <Worksheet 'Bottoms' id:611649355>, <Worksheet 'Dresses' id:1069008450>, <Worksheet 'Shoes' id:869863122>, <Worksheet '2020 Changes' id:12959625>, <Worksheet '2019 Changes' id:684059692>, <Worksheet '30 Wears' id:81161752>]


In [35]:
# Access and store data from sheet named tops
tops = client.open('Wardrobe Tracking').sheet1
tops_df = pd.DataFrame(tops.get_all_records())

# Standardize columns, column names, and remove dollar signs 
tops_df = tops_df.rename(columns={'Sub-Category': 'Subcategory - Sleeves'})
tops_df['Subcategory - Length'] = np.nan
tops_df['Cost'] = tops_df['Cost'].str.replace('$', '')
tops_df['Repairs/Tailoring'] = tops_df['Repairs/Tailoring'].str.replace('$', '')
tops_df['Type'] = 'Tops'

# View dataframe
tops_df.head()

Unnamed: 0,Wardrobe Tracking - Start Date 12/9/2018,Wears (11/2020),Wears (10/2020),Wears (9/2020),Wears (8/2020),Wears (7/2020),Wears (6/2020),Wears (5/2020),Wears (4/2020),Wears (3/2020),...,Notes,Repairs/Tailoring,CPW,Category,Subcategory - Sleeves,Color,Today,Start Date,Subcategory - Length,Type
0,Brian's White Buttonup LS Shirt,,2,1.0,2.0,,,,,,...,,,$0.00,shirt,long sleeve,cream/white/tan,11/11/2020,12/9/2018,,Tops
1,Cream Turtleneck LS Shirt,,1,1.0,,,STORAGE,1.0,1.0,,...,,,$0.35,shirt,long sleeve,cream/white/tan,11/11/2020,12/9/2018,,Tops
2,Tie Dye Mineo LS Shirt,,1,,,,,,2.0,,...,Winter Jam,,$7.00,shirt,long sleeve,multi,11/11/2020,12/9/2018,,Tops
3,Forest Green Silk LS Shirt,,2,,,,,1.0,,,...,,,$2.30,shirt,long sleeve,green,11/11/2020,12/9/2018,,Tops
4,Grey Heather LS Shirt,,1,,1.0,1.0,,2.0,1.0,1.0,...,,,$0.65,shirt,long sleeve,grey,11/11/2020,12/9/2018,,Tops


In [36]:
# Access and store data from sheet named tops
outerwear = client.open('Wardrobe Tracking').worksheet('Outerwear')
outerwear_df = pd.DataFrame(outerwear.get_all_records())

# Standardize columns, column names and remove dollar signs 
outerwear_df = outerwear_df.rename(columns={'Sub-Category': 'Subcategory - Sleeves', 
                                            'Wears 10/2020': 'Wears (10/2020)',
                                           'Wears 9/2020': 'Wears (9/2020)'})
outerwear_df['Subcategory - Length'] = np.nan
outerwear_df['Cost'] = outerwear_df['Cost'].str.replace('$', '')
outerwear_df['Repairs/Tailoring'] = outerwear_df['Repairs/Tailoring'].str.replace('$', '')
outerwear_df['Type'] = 'Outerwear'

# View dataframe
outerwear_df.head()

Unnamed: 0,Wardrobe Tracking - Start Date 12/9/2018,Wears (11/2020),Wears (10/2020),Wears (9/2020),Wears (8/2020),Wears (7/2020),Wears (6/2020),Wears (5/2020),Wears (4/2020),Wears (3/2020),...,Source,Notes,Repairs/Tailoring,CPW,Category,Subcategory - Sleeves,Today,Start Date,Subcategory - Length,Type
0,Blue Pullover Sweater,,,,,,,,1.0,,...,Hand-me-down,,,$0.00,sweater,long sleeve,11/11/2020,12/9/2018,,Outerwear
1,Beige Cable Knit Sweater,3.0,2.0,,,,,,,,...,Secondhand,,,$2.00,sweater,long sleeve,11/11/2020,12/9/2018,,Outerwear
2,Cream Knit Crop Sweater,1.0,3.0,1.0,,,,,,,...,,,,$0.83,sweater,long sleeve,11/11/2020,12/9/2018,,Outerwear
3,Purple and Orange Mockneck Sweater,1.0,5.0,,,,,,,,...,Secondhand,,,$1.67,sweater,long sleeve,11/11/2020,12/9/2018,,Outerwear
4,Cream Cardigan,,,1.0,,,,2.0,2.0,4.0,...,Slow Fashion,,,$1.95,cardigan,long sleeve,11/11/2020,12/9/2018,,Outerwear


In [37]:
# Access and store data from sheet named tops
bottoms = client.open('Wardrobe Tracking').worksheet('Bottoms')
bottoms_df = pd.DataFrame(bottoms.get_all_records())

# Standardize columns, column names and remove dollar signs
bottoms_df = bottoms_df.rename(columns={'Sub-Category': 'Subcategory - Length', 
                                            'Wears 10/2020': 'Wears (10/2020)',
                                           'Wears 9/2020': 'Wears (9/2020)'})
bottoms_df['Subcategory - Sleeves'] = np.nan
bottoms_df['Cost'] = bottoms_df['Cost'].str.replace('$', '')
bottoms_df['Repairs/Tailoring'] = bottoms_df['Repairs/Tailoring'].str.replace('$', '')
bottoms_df['Type'] = 'Bottoms'

# View dataframe
bottoms_df.head()

Unnamed: 0,Wardrobe Tracking - Start Date 12/9/2018,Wears (11/2020),Wears (10/2020),Wears (9/2020),Wears (8/2020),Wears (7/2020),Wears (6/2020),Wears (5/2020),Wears (4/2020),Wears (3/2020),...,Source,Notes,Repairs/Tailoring,CPW,Category,Subcategory - Length,Today,Date Started,Subcategory - Sleeves,Type
0,Curator Rust CL Skirt,1.0,,3.0,,1.0,1.0,3.0,1.0,1.0,...,Secondhand,,7.0,$3.90,skirt,calf-length maxi,11/11/2020,12/9/2018,,Bottoms
1,Black Midi CL Circle Skirt,,,,,,,,1.0,3.0,...,Gift,,,$0.00,skirt,calf-length maxi,11/11/2020,12/9/2018,,Bottoms
2,Grey FL Maxi Skirt,,1.0,1.0,,,,,2.0,,...,,,,$2.00,skirt,full-length maxi,11/11/2020,12/9/2018,,Bottoms
3,Pink and Tan Pattern Maxi Skirt,,,,2.0,,,,,1.0,...,Gift,,,$0.00,skirt,full-length maxi,11/11/2020,12/9/2018,,Bottoms
4,Light Black Skinny Leg Jeans,,1.0,,,,3.0,7.0,2.0,12.0,...,New,,,$3.11,jeans,skinny leg,11/11/2020,12/9/2018,,Bottoms


In [38]:
# Access and store data from sheet named tops
dresses = client.open('Wardrobe Tracking').worksheet('Dresses')
dresses_df = pd.DataFrame(dresses.get_all_records())

# Standardize columns, column names and data types
dresses_df = dresses_df.rename(columns={'Dress Sleeve Length': 'Subcategory - Sleeves', 
                                        'Dress Length': 'Subcategory - Length',
                                       'Wears 10/2020': 'Wears (10/2020)'})
dresses_df['Cost'] = dresses_df['Cost'].str.replace('$', '')
dresses_df['Repairs/Tailoring'] = dresses_df['Repairs/Tailoring'].str.replace('$', '')
dresses_df['Type'] = 'Dresses'

# View dataframe
dresses_df.head()

Unnamed: 0,Wardrobe Tracking - Start Date 12/9/2018,Wears (11/2020),Wears (10/2020),Wears (9/2020),Wears (8/2020),Wears (7/2020),Wears (6/2020),Wears (5/2020),Wears (4/2020),Wears (3/2020),...,Notes,Repairs/Tailoring,CPW,Unnamed: 15,Category,Subcategory - Length,Subcategory - Sleeves,Today,Date Started,Type
0,Blue and Yellow Jumpsuit,,1.0,1.0,3.0,,1.0,,,,...,,,$15.83,,jumpsuit,full length,sleeveless,11/11/2020,12/9/2018,Dresses
1,White Slip SL KL Dress,,,1.0,1.0,,1.0,1.0,1.0,,...,,,$4.89,,dress,knee length,sleeveless,11/11/2020,12/9/2018,Dresses
2,Blue and White Polka Dot SL KL Dress,,,,,1.0,,,1.0,,...,,,$0.00,FL = Full Length,dress,knee length,sleeveless,11/11/2020,12/9/2018,Dresses
3,"Multicolor (orange, white, blue) SL KL Dress",,,,1.0,,,,1.0,,...,,,$6.25,,dress,knee length,sleeveless,11/11/2020,12/9/2018,Dresses
4,Black High-low SL KL Dress,,,,1.0,,,1.0,1.0,3.0,...,,,$0.00,,dress,knee length,sleeveless,11/11/2020,12/9/2018,Dresses


In [39]:
# Access and store data from sheet named tops
shoes = client.open('Wardrobe Tracking').worksheet('Shoes')
shoes_df = pd.DataFrame(shoes.get_all_records())

# Standardize columns, column names and remove dollar signs
shoes_df = shoes_df.rename(columns={'Repairs': 'Repairs/Tailoring', 'Wears 10/2020': 'Wears (10/2020)'})
shoes_df['Subcategory - Sleeves'] = np.nan
shoes_df['Subcategory - Length'] = np.nan
shoes_df['Cost'] = shoes_df['Cost'].str.replace('$', '')
shoes_df['Repairs/Tailoring'] = shoes_df['Repairs/Tailoring'].str.replace('$', '')
shoes_df['Type'] = 'Shoes'

# View datafram
shoes_df.head()

Unnamed: 0,Wardrobe Tracking - Start Date 12/9/2018,Wears (11/2020),Wears (10/2020),Wears (9/2020),Wears (8/2020),Wears (7/2020),Wears (6/2020),Wears (5/2020),Wears (4/2020),Wears (3/2020),...,Source,Notes,Repairs/Tailoring,CPW,Category,Today,Date Started,Subcategory - Sleeves,Subcategory - Length,Type
0,Black Leather Open-Toed Sandals,,1.0,,2,7,1,,,,...,,,,$1.38,sandals,11/11/2020,12/9/2018,,,Shoes
1,Brown Leather Trevi Flats,,,1.0,3,1,1,5.0,,3.0,...,Slow Fashion,,,$7.71,flats,11/11/2020,12/9/2018,,,Shoes
2,Black Leather Iris Loafers,,,1.0,3,2,2,2.0,6.0,6.0,...,Slow Fashion,,,$4.27,flats,11/11/2020,12/9/2018,,,Shoes
3,Blue Leather Sneakers,1.0,2.0,3.0,4,1,3,,1.0,1.0,...,Slow Fashion,,,$1.94,sneakers,11/11/2020,12/9/2018,,,Shoes
4,White Tennis Sneakers,1.0,1.0,1.0,3,5,1,12.0,3.0,9.0,...,Slow Fashion,,,$3.16,sneakers,11/11/2020,12/9/2018,,,Shoes


## Combine sheets into one DataFrame

In [50]:
# Combine dataframes to create master dataframe 
wardrobe_df = pd.concat([tops_df, bottoms_df, outerwear_df, shoes_df], ignore_index=True)

# Rename first column
wardrobe_df = wardrobe_df.rename(columns={'Wardrobe Tracking - Start Date 12/9/2018': 'Item', 'Acquired': 'Date Acquired'})

# Drop empty rows
wardrobe_df = wardrobe_df[wardrobe_df['Item'] != '']
    
# Replace STORAGE notes with NaN 
wardrobe_df = wardrobe_df.replace('STORAGE', np.nan)
wardrobe_df = wardrobe_df.replace('STORAGE ', np.nan)

# Clarify source of items that are not noted
wardrobe_df['Source'] = wardrobe_df['Source'].replace('', 'Fast Fashion')

# Remove unnecessary columns 
# Note: I removed November wears since that month is not complete
init_col_names = ['Wears/Month', 'CPW', 'Today', 'Start Date', 'Date Started', 'Wears (2018)', 'Wears (11/2020)', 'Total Wears']
for col_name in init_col_names:
    try:
        del wardrobe_df[col_name]
    except:
        pass
    
wardrobe_df.head()

Unnamed: 0,Item,Wears (10/2020),Wears (9/2020),Wears (8/2020),Wears (7/2020),Wears (6/2020),Wears (5/2020),Wears (4/2020),Wears (3/2020),Wears (2/2020),...,Cost,Brand,Source,Notes,Repairs/Tailoring,Category,Subcategory - Sleeves,Color,Subcategory - Length,Type
0,Brian's White Buttonup LS Shirt,2,1.0,2.0,,,,,,,...,0,Brian,Secondhand,,,shirt,long sleeve,cream/white/tan,,Tops
1,Cream Turtleneck LS Shirt,1,1.0,,,,1.0,1.0,,1.0,...,4,Goodwill,Secondhand,,,shirt,long sleeve,cream/white/tan,,Tops
2,Tie Dye Mineo LS Shirt,1,,,,,,2.0,,2.0,...,35,Andy Mineo,Fast Fashion,Winter Jam,,shirt,long sleeve,multi,,Tops
3,Forest Green Silk LS Shirt,2,,,,,1.0,,,1.0,...,23,Vince/Poshmark,Secondhand,,,shirt,long sleeve,green,,Tops
4,Grey Heather LS Shirt,1,,1.0,1.0,,2.0,1.0,1.0,1.0,...,15,Old Navy,Fast Fashion,,,shirt,long sleeve,grey,,Tops


In [51]:
# Pull list of column names and check for correct concatenation
column_names = list(wardrobe_df.columns)
print(column_names)

['Item', 'Wears (10/2020)', 'Wears (9/2020)', 'Wears (8/2020)', 'Wears (7/2020)', 'Wears (6/2020)', 'Wears (5/2020)', 'Wears (4/2020)', 'Wears (3/2020)', 'Wears (2/2020)', 'Wears (1/2020)', 'Wears (12/2019)', 'Wears (11/2019)', 'Wears (10/2019)', 'Wears (9/2019)', 'Wears (8/2019)', 'Wears (7/2019)', 'Wears (6/2019)', 'Wears (5/2019)', 'Wears (4/2019)', 'Wears (3/2019)', 'Wears (2/2019)', 'Wears (1/2019)', 'Date Acquired', 'Cost', 'Brand', 'Source', 'Notes', 'Repairs/Tailoring', 'Category', 'Subcategory - Sleeves', 'Color', 'Subcategory - Length', 'Type']


In [52]:
# Store in CSV
wardrobe_df.to_csv('Resources/wardrobedatainitial.csv', index=False)

## Analyze data for total wears, cost per wear and wears per month

In [53]:
# Read in CSV
wardrobe_df = pd.read_csv('Resources/wardrobedatainitial.csv')
wardrobe_df.head()

Unnamed: 0,Item,Wears (10/2020),Wears (9/2020),Wears (8/2020),Wears (7/2020),Wears (6/2020),Wears (5/2020),Wears (4/2020),Wears (3/2020),Wears (2/2020),...,Cost,Brand,Source,Notes,Repairs/Tailoring,Category,Subcategory - Sleeves,Color,Subcategory - Length,Type
0,Brian's White Buttonup LS Shirt,2.0,1.0,2.0,,,,,,,...,0,Brian,Secondhand,,,shirt,long sleeve,cream/white/tan,,Tops
1,Cream Turtleneck LS Shirt,1.0,1.0,,,,1.0,1.0,,1.0,...,4,Goodwill,Secondhand,,,shirt,long sleeve,cream/white/tan,,Tops
2,Tie Dye Mineo LS Shirt,1.0,,,,,,2.0,,2.0,...,35,Andy Mineo,Fast Fashion,Winter Jam,,shirt,long sleeve,multi,,Tops
3,Forest Green Silk LS Shirt,2.0,,,,,1.0,,,1.0,...,23,Vince/Poshmark,Secondhand,,,shirt,long sleeve,green,,Tops
4,Grey Heather LS Shirt,1.0,,1.0,1.0,,2.0,1.0,1.0,1.0,...,15,Old Navy,Fast Fashion,,,shirt,long sleeve,grey,,Tops


In [55]:
# Remove trailing whitespaces
columns = wardrobe_df.columns
for col in wardrobe_df.columns:
    if wardrobe_df[col].dtype == 'object':
        wardrobe_df[col] = wardrobe_df[col].str.rstrip()
    if wardrobe_df[col].dtype == 'int64' or wardrobe_df[col].dtype == 'float64':
        wardrobe_df[col] = wardrobe_df[col].replace(np.nan, 0)
        
wardrobe_df.head()

Unnamed: 0,Item,Wears (10/2020),Wears (9/2020),Wears (8/2020),Wears (7/2020),Wears (6/2020),Wears (5/2020),Wears (4/2020),Wears (3/2020),Wears (2/2020),...,Cost,Brand,Source,Notes,Repairs/Tailoring,Category,Subcategory - Sleeves,Color,Subcategory - Length,Type
0,Brian's White Buttonup LS Shirt,2.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,Brian,Secondhand,,0.0,shirt,long sleeve,cream/white/tan,,Tops
1,Cream Turtleneck LS Shirt,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,...,4,Goodwill,Secondhand,,0.0,shirt,long sleeve,cream/white/tan,,Tops
2,Tie Dye Mineo LS Shirt,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,...,35,Andy Mineo,Fast Fashion,Winter Jam,0.0,shirt,long sleeve,multi,,Tops
3,Forest Green Silk LS Shirt,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,23,Vince/Poshmark,Secondhand,,0.0,shirt,long sleeve,green,,Tops
4,Grey Heather LS Shirt,1.0,0.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,...,15,Old Navy,Fast Fashion,,0.0,shirt,long sleeve,grey,,Tops


In [56]:
# Determine columns that include wear counts
wear_cols = []

for col_name in column_names:
    if 'Wears' in col_name:
        wear_cols.append(col_name)
print(wear_cols)

['Wears (10/2020)', 'Wears (9/2020)', 'Wears (8/2020)', 'Wears (7/2020)', 'Wears (6/2020)', 'Wears (5/2020)', 'Wears (4/2020)', 'Wears (3/2020)', 'Wears (2/2020)', 'Wears (1/2020)', 'Wears (12/2019)', 'Wears (11/2019)', 'Wears (10/2019)', 'Wears (9/2019)', 'Wears (8/2019)', 'Wears (7/2019)', 'Wears (6/2019)', 'Wears (5/2019)', 'Wears (4/2019)', 'Wears (3/2019)', 'Wears (2/2019)', 'Wears (1/2019)']


In [57]:
# Calculate total wears
wardrobe_df['Total Wears'] = wardrobe_df[wear_cols].sum(axis=1)

wardrobe_df.head()

Unnamed: 0,Item,Wears (10/2020),Wears (9/2020),Wears (8/2020),Wears (7/2020),Wears (6/2020),Wears (5/2020),Wears (4/2020),Wears (3/2020),Wears (2/2020),...,Brand,Source,Notes,Repairs/Tailoring,Category,Subcategory - Sleeves,Color,Subcategory - Length,Type,Total Wears
0,Brian's White Buttonup LS Shirt,2.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Brian,Secondhand,,0.0,shirt,long sleeve,cream/white/tan,,Tops,5.0
1,Cream Turtleneck LS Shirt,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,...,Goodwill,Secondhand,,0.0,shirt,long sleeve,cream/white/tan,,Tops,12.0
2,Tie Dye Mineo LS Shirt,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,...,Andy Mineo,Fast Fashion,Winter Jam,0.0,shirt,long sleeve,multi,,Tops,5.0
3,Forest Green Silk LS Shirt,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,Vince/Poshmark,Secondhand,,0.0,shirt,long sleeve,green,,Tops,10.0
4,Grey Heather LS Shirt,1.0,0.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,...,Old Navy,Fast Fashion,,0.0,shirt,long sleeve,grey,,Tops,23.0


In [75]:
# Calculate cost per wear
wardrobe_df['Repairs/Tailoring'] = wardrobe_df['Repairs/Tailoring'].replace(np.nan, 0)
wardrobe_df['Cost per Wear'] = 0
for index, row in wardrobe_df.iterrows():
    if row['Total Wears'] > 0:
        wardrobe_df.loc[index, 'Cost per Wear'] = (row['Cost'] + row['Repairs/Tailoring'])/row['Total Wears']

wardrobe_df.head()

Unnamed: 0,Item,Total Wears,Cost per Wear,Wears per Month,Date Acquired,Cost,Source,Repairs/Tailoring,Color,Type,...,Wears (10/2019),Wears (9/2019),Wears (8/2019),Wears (7/2019),Wears (6/2019),Wears (5/2019),Wears (4/2019),Wears (3/2019),Wears (2/2019),Wears (1/2019)
0,Brian's White Buttonup LS Shirt,5.0,0.0,1.59574,2020-08-09,0,Secondhand,0.0,cream/white/tan,Tops,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Cream Turtleneck LS Shirt,12.0,0.333333,0.519481,2018-12-19,4,Secondhand,0.0,cream/white/tan,Tops,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,Tie Dye Mineo LS Shirt,5.0,7.0,0.555556,2020-02-15,35,Fast Fashion,0.0,multi,Tops,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Forest Green Silk LS Shirt,10.0,2.3,0.769231,2019-10-18,23,Secondhand,0.0,green,Tops,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Grey Heather LS Shirt,23.0,0.652174,0.660287,2018-01-01,15,Fast Fashion,0.0,grey,Tops,...,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,3.0


In [76]:
# Calculate wears per month
wardrobe_df['Date Acquired'] = pd.to_datetime(wardrobe_df['Date Acquired'])
wardrobe_df['Wears per Month'] = ''
for index, row in wardrobe_df.iterrows():
    delta = dt.datetime.today() - row['Date Acquired']
    months = delta.days/30
    wardrobe_df.loc[index, 'Wears per Month'] = row['Total Wears']/months

wardrobe_df.head()

Unnamed: 0,Item,Total Wears,Cost per Wear,Wears per Month,Date Acquired,Cost,Source,Repairs/Tailoring,Color,Type,...,Wears (10/2019),Wears (9/2019),Wears (8/2019),Wears (7/2019),Wears (6/2019),Wears (5/2019),Wears (4/2019),Wears (3/2019),Wears (2/2019),Wears (1/2019)
0,Brian's White Buttonup LS Shirt,5.0,0.0,1.59574,2020-08-09,0,Secondhand,0.0,cream/white/tan,Tops,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Cream Turtleneck LS Shirt,12.0,0.333333,0.519481,2018-12-19,4,Secondhand,0.0,cream/white/tan,Tops,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,Tie Dye Mineo LS Shirt,5.0,7.0,0.555556,2020-02-15,35,Fast Fashion,0.0,multi,Tops,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Forest Green Silk LS Shirt,10.0,2.3,0.769231,2019-10-18,23,Secondhand,0.0,green,Tops,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Grey Heather LS Shirt,23.0,0.652174,0.660287,2018-01-01,15,Fast Fashion,0.0,grey,Tops,...,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,3.0


In [77]:
# Print list of column names
column_names_final = list(wardrobe_df.columns)
print(column_names_final)

['Item', 'Total Wears', 'Cost per Wear', 'Wears per Month', 'Date Acquired', 'Cost', 'Source', 'Repairs/Tailoring', 'Color', 'Type', 'Category', 'Subcategory - Sleeves', 'Subcategory - Length', 'Wears (10/2020)', 'Wears (9/2020)', 'Wears (8/2020)', 'Wears (7/2020)', 'Wears (6/2020)', 'Wears (5/2020)', 'Wears (4/2020)', 'Wears (3/2020)', 'Wears (2/2020)', 'Wears (1/2020)', 'Wears (12/2019)', 'Wears (11/2019)', 'Wears (10/2019)', 'Wears (9/2019)', 'Wears (8/2019)', 'Wears (7/2019)', 'Wears (6/2019)', 'Wears (5/2019)', 'Wears (4/2019)', 'Wears (3/2019)', 'Wears (2/2019)', 'Wears (1/2019)']


In [78]:
# Rearrange columns
wardrobe_df = wardrobe_df[['Item', 
                           'Total Wears', 
                           'Cost per Wear', 
                           'Wears per Month', 
                           'Date Acquired', 
                           'Cost', 
                           'Source', 
                           'Repairs/Tailoring', 
                           'Color', 
                           'Type', 
                           'Category', 
                           'Subcategory - Sleeves', 
                           'Subcategory - Length',
                           'Wears (10/2020)', 
                           'Wears (9/2020)', 
                           'Wears (8/2020)', 
                           'Wears (7/2020)', 
                           'Wears (6/2020)', 
                           'Wears (5/2020)', 
                           'Wears (4/2020)', 
                           'Wears (3/2020)', 
                           'Wears (2/2020)', 
                           'Wears (1/2020)', 
                           'Wears (12/2019)', 
                           'Wears (11/2019)', 
                           'Wears (10/2019)', 
                           'Wears (9/2019)', 
                           'Wears (8/2019)', 
                           'Wears (7/2019)', 
                           'Wears (6/2019)', 
                           'Wears (5/2019)', 
                           'Wears (4/2019)', 
                           'Wears (3/2019)', 
                           'Wears (2/2019)', 
                           'Wears (1/2019)']]

wardrobe_df.head()

Unnamed: 0,Item,Total Wears,Cost per Wear,Wears per Month,Date Acquired,Cost,Source,Repairs/Tailoring,Color,Type,...,Wears (10/2019),Wears (9/2019),Wears (8/2019),Wears (7/2019),Wears (6/2019),Wears (5/2019),Wears (4/2019),Wears (3/2019),Wears (2/2019),Wears (1/2019)
0,Brian's White Buttonup LS Shirt,5.0,0.0,1.59574,2020-08-09,0,Secondhand,0.0,cream/white/tan,Tops,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Cream Turtleneck LS Shirt,12.0,0.333333,0.519481,2018-12-19,4,Secondhand,0.0,cream/white/tan,Tops,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,Tie Dye Mineo LS Shirt,5.0,7.0,0.555556,2020-02-15,35,Fast Fashion,0.0,multi,Tops,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Forest Green Silk LS Shirt,10.0,2.3,0.769231,2019-10-18,23,Secondhand,0.0,green,Tops,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Grey Heather LS Shirt,23.0,0.652174,0.660287,2018-01-01,15,Fast Fashion,0.0,grey,Tops,...,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,3.0


In [79]:
# Export to CSV
wardrobe_df.to_csv('Resources/wardrobedata.csv', index=False)