# Majority Voting

In [None]:
import pandas as pd
import json

# Read in the csv file of results
results_df = pd.read_csv("Glam_Guru_Batch_Results.csv")

###############################################################################################
############################COUNTING VOTES PART################################################
###############################################################################################
votes = {}          #Dictionary mapping image url to a dictionary that maps label to its count of votes
uni = set()
#iterate through the rows and get the votes
for index, row in results_df.iterrows():
  #Convert the answer field to a dictionary for access later
  #Also strip the end braces off the string so the loads function works 
  answers = json.loads(row["answers"][1:-1])
  worker = row["workerid"]
  print(worker)
  uni.add(worker)

  for num in range(1,13):
    image_num = "image" + str(num)
    url = row[image_num]

    #intialize an entry for this url if it doesnt already exist
    if url not in votes:
      votes[url] = {"Casual and fashionable": 0, "Casual, but not fashionable":0, "Formal and fashionable": 0, "Formal, but not fashionable" : 0}

    #Check the given answer for this image and update its count
    ans = answers.get(image_num)
    votes[url][ans] += 1

print(len(uni))

###############################################################################################
############################MAJORITY VOTE PART#################################################
###############################################################################################

labels = {}         #Dictionary mapping image url to its majority vote label

#Go through votes dictionary and find label with most votes for each url 
fornotfas = 0
casnotfas = 0
forandfas = 0
casandfas = 0
for key, value in votes.items():
  url = key
  counts = value



  cas_and_fas = counts.get("Casual and fashionable")
  cas_not_fas = counts.get("Casual, but not fashionable")
  for_and_fas = counts.get("Formal and fashionable")
  for_not_fas = counts.get("Formal, but not fashionable")
  
  if (for_not_fas >= cas_not_fas) and (for_not_fas >= cas_and_fas) and (for_not_fas >= for_and_fas):
    labels[url] = "Formal, but not fashionable"
    fornotfas += 1
  elif (for_and_fas >= cas_not_fas) and (for_and_fas >= cas_and_fas) and (for_and_fas >= for_not_fas):
    labels[url] = "Formal and fashionable"
    forandfas += 1
  elif (cas_not_fas >= cas_and_fas) and (cas_not_fas >= for_and_fas) and (cas_not_fas >= for_not_fas):
    labels[url] = "Casual, but not fashionable"
    casnotfas += 1
  else:
    labels[url] = "Casual and fashionable"
    casandfas += 1

## ML SECTION

In [None]:
!pip install -Uqq fastbook
import pandas as pd 
import fastbook
fastbook.setup_book()
import sklearn
from fastbook import *
from fastai.vision.all import *
from google.colab import drive
drive.mount('/content/drive')


[K     |████████████████████████████████| 727kB 9.0MB/s 
[K     |████████████████████████████████| 1.2MB 11.4MB/s 
[K     |████████████████████████████████| 204kB 42.7MB/s 
[K     |████████████████████████████████| 51kB 6.8MB/s 
[K     |████████████████████████████████| 61kB 7.7MB/s 
[K     |████████████████████████████████| 51kB 6.6MB/s 
[?25hMounted at /content/gdrive
Mounted at /content/drive


In [None]:
def get_path_from_url(url):
  return url.replace('https://s3.amazonaws.com/nets213glamguru/', '/content/drive/MyDrive/images_compressed/')

image_urls = list(labels.keys())                        # remote URLs to the images
paths = [get_path_from_url(url) for url in image_urls]  # local paths to the image files in the Google Colab files
all_labels = list(labels.values())                      # All the labels for each image

In [None]:
# #FIND CORRUPTED IMAGE FILES
# keep = []                                               #list holding indices that we want to keep
# for i in range(len(paths)):
#   print(i)
#   if verify_image(paths[i]):
#     keep.append(i)

# paths_cleaned = []                                      #Holds the valid paths
# labels_cleaned = []                                     #Holds the valid labels 

# for i in keep:
#   paths_cleaned.append(paths[i])                        #get rid of the image in paths
#   labels_cleaned.append(all_labels[i])                  #get rid of the label for this image as well

#Found the corrupted indices, so here for easier running. 
remove = [1506, 167, 1839, 144, 2582, 5757, 5086]
keep = []
for i in range(len(paths)):
  if i not in remove:
    keep.append(i)

paths_cleaned = []                                      #Holds the valid paths
labels_cleaned = []  

for num in keep:
  paths_cleaned.append(paths[num])                        #get rid of the image in paths
  labels_cleaned.append(all_labels[num])

paths_train, paths_test, labels_train, labels_test = sklearn.model_selection.train_test_split(paths_cleaned, labels_cleaned, test_size=0.2, stratify=labels_cleaned)

In [None]:
dls = ImageDataLoaders.from_lists("", paths_train, labels_train, valid_pct=0.2, seed=42, item_tfms=Resize(224))
classifier = cnn_learner(dls, models.resnet34, metrics=error_rate, pretrained=True)
classifier.fine_tune(4)

epoch,train_loss,valid_loss,error_rate,time
0,2.094778,1.34167,0.53913,15:10


epoch,train_loss,valid_loss,error_rate,time
0,1.444051,1.0688,0.415217,20:13


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns


predictions = [classifier.predict(path)[0] for path in paths_test]
cf_matrix = confusion_matrix(labels_test, predictions, labels=["Casual and fashionable", "Casual, but not fashionable", "Formal and fashionable","Formal, but not fashionable" ])
sns.heatmap(cf_matrix, annot=True)



array([[1151,    0,    0,    0],
       [   0,    0,    0,    0],
       [   0,    0,    0,    0],
       [   0,    0,    0,    0]])

In [None]:
classifier.export("fashionclassifer.pkl")

2
