# **Clothero Coparison and CSV conversions**

## Connect drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

 ## Import libraries

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt 
import base64
from PIL import Image
import io
import math 
from math import sqrt
import urllib.request
import cv2

## Download Pretrained model

In [None]:
embed = tf.keras.applications.MobileNetV2(input_shape=(224,224,3),
                                               include_top=False,
                                               weights='imagenet')

## Tensorflow compare class

In [None]:
class TensorVector():

    def __init__(self):
        pass

    def process(self, FileName):
        img = tf.io.read_file(FileName)
        img = tf.io.decode_jpeg(img, channels=3)
        img = tf.image.resize_with_pad(img, 224, 224)
        img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
        features = embed.predict(img)
        resize_feature = np.reshape(features, (7*7*1280))
        return resize_feature

    def cosineSim(self, a1, a2):
        sum = 0
        suma1 = 0
        sumb1 = 0
        for i, j in zip(a1, a2):
            suma1 += i * i
            sumb1 += j*j
            sum += i*j
        cosine_sim = sum / ((sqrt(suma1))*(sqrt(sumb1)))
        return cosine_sim

    def jaccard_similarity(self, list1, list2):
        intersection = len(list(set(list1).intersection(list2)))
        union = (len(list1) + len(list2)) - intersection
        return float(intersection) / union

    def average(self, x):
        assert len(x) > 0
        return float(sum(x)) / len(x)

    def pearson_def(self, x, y):
        assert len(x) == len(y)
        n = len(x)
        assert n > 0
        avg_x = self.average(x)
        avg_y = self.average(y)
        diffprod = 0
        xdiff2 = 0
        ydiff2 = 0
        for idx in range(n):
            xdiff = x[idx] - avg_x
            ydiff = y[idx] - avg_y
            diffprod += xdiff * ydiff
            xdiff2 += xdiff * xdiff
            ydiff2 += ydiff * ydiff

        return diffprod / math.sqrt(xdiff2 * ydiff2)

    def mean(self, vector1, vector2):
        cs = self.cosineSim(vector1, vector2)
        # js=self.jaccard_similarity(vector1,vector2)
        ps = self.pearson_def(vector1, vector2)
        # print(cs,js,ps)
        average = (cs+ps)/2
        # print(average)
        return average

    def compare(self, images):
        vector1 = self.process(images[0])
        vector2 = self.process(images[1])
        return self.mean(vector1, vector2)


## convert base64


In [None]:

def convertBase64(FileName):
    """
    Return the Numpy array for a image 
    """
    with open(FileName, "rb") as f:
        data = f.read()

    res = base64.b64encode(data)

    base64data = res.decode("UTF-8")

    imgdata = base64.b64decode(base64data)

    #image = Image.open(io.BytesIO(imgdata))

    #return np.array(image)
    return imgdata


## Upload CSV Scrapped

In [None]:
from google.colab import files
files.upload()

## Create Class object and convert to csv database

In [None]:
ls_obj = TensorVector()
col = ['Name', 'Product', 'Price', 'Link', 'Image', 'Type']
db1 = pd.read_csv("amazon_data_jeans.csv")[:2500]
db1.columns = col
db2 = pd.read_csv("amazon_data_tshirt.csv")[:2500]
db2.columns = col
print(db1.head())
img_url1 = db1[['Image']].values
img_url2 = db2[['Image']].values
db1['Vector'] = ""

img_name = 'sample.jpg'


## Iterate for Jeans n Tshirt

In [None]:
for id, i in tqdm.tqdm(enumerate(img_url1)):
    try:
        urllib.request.urlretrieve(i[0], img_name)
        vc = cls_obj.process(img_name)
        db1['Vector'].iloc[id] = vc.tolist()
    except:
        db1['Vector'].iloc[id] = None
        print("Err")
db2['Vector'] = ""

img_name = 'sample.jpg'

for id, i in tqdm.tqdm(enumerate(img_url2)):
    try:
        urllib.request.urlretrieve(i[0], img_name)
        vc = cls_obj.process(img_name)
        db2['Vector'].iloc[id] = vc.tolist()
    except Exception as e:
        db2['Vector'].iloc[id] = None
        print(e)


## Combine

In [None]:
final_db = pd.concat([db1, db2])
final_db.columns = ['Name', 'Product',
                    'Price', 'Link', 'Image', 'Type', 'Vector']
final_db.reset_index(inplace=True, drop=True)
final_db.drop(['Name'], axis=1)
final_db.to_csv("Data.csv", index=0)

# **Make Database for show data**

In [None]:
t_img_url = ["https://assets.myntassets.com/h_1440,q_90,w_1080/v1/assets/images/12029466/2020/7/29/059e98dd-604b-4dc0-9836-534d139248221596032265588AnoukWomenNavyBlueSolidStraightKurtaTshirtsAmericanCrewMenTs2.jpg",

             "https://assets.myntassets.com/h_1440,q_90,w_1080/v1/assets/images/5508761/2018/4/30/11525068766496-HERENOW-Men-Teal-Printed-Round-Neck-T-shirt-2021525068766307-1.jpg",
             "https://assets.myntassets.com/h_1440,q_90,w_1080/v1/assets/images/11548544/2020/3/9/5eeaead2-de8e-45fb-99d9-c5bccf21e7ea1583748694212-Huetrap-Men-Tshirts-6441583748692050-1.jpg",
             "https://assets.myntassets.com/h_1440,q_90,w_1080/v1/assets/images/1964364/2017/6/23/11498197904638-HERENOW-Men-Navy-Blue-Solid-Henley-Neck-T-shirt-6741498197904414-1.jpg",
             "https://assets.myntassets.com/h_1440,q_90,w_1080/v1/assets/images/1997302/2017/8/23/11503492424739-Roadster-Men-Red-Printed-Round-Neck-T-shirt-4971503492424397-1.jpg",
             "https://assets.myntassets.com/h_1440,q_90,w_1080/v1/assets/images/11748120/2020/7/7/48d5564c-8166-4981-a50a-4414e59e49bc1594111528301ShangrilaCreationGold-TonedPinkSilkBlendWovenDesignPaithaniS1.jpg",

             "https://assets.ajio.com/medias/sys_master/root/h53/h74/15216776806430/-473Wx593H-461085141-blue-MODEL3.jpg",
             "https://assets.ajio.com/medias/sys_master/root/h22/hc3/16010300588062/-473Wx593H-461005997-multi-MODEL.jpg",
             "https://assets.ajio.com/medias/sys_master/root/ajio/catalog/5efa2b5af997dd433b463c86/-473Wx593H-461209675-navy-MODEL.jpg",
             "https://assets.ajio.com/medias/sys_master/root/ajio/catalog/5f0cab777cdb8c721b7bf2dd/-473Wx593H-460545545-green-MODEL3.jpg",
             "https://assets.ajio.com/medias/sys_master/root/h9f/hd0/16053450932254/-473Wx593H-461134695-yellow-MODEL4.jpg",
             ]
j_img_url = ["https://assets.myntassets.com/h_1440,q_90,w_1080/v1/assets/images/10476518/2019/8/22/1ff738e2-aeb0-41e9-b81e-4046dc02d6221566465605679-LOCOMOTIVE-Men-Navy-Blue-Tapered-Fit-Jeans-8201566465604503-1.jpg",
             "https://assets.myntassets.com/h_1440,q_90,w_1080/v1/assets/images/4451364/2019/11/25/8d57e2fc-4c1b-4f01-b129-40d3412eee5f1574687095197-Roadster-Fast-and-Furious-Men-Blue-Slim-Fit-Mid-Rise-Low-Dis-1.jpg",
             "https://assets.myntassets.com/h_1440,q_90,w_1080/v1/assets/images/productimage/2020/7/7/9f7e7546-925e-4c07-b9de-1ec252b7cf7b1594100227721-1.jpg",
             "https://assets.ajio.com/medias/sys_master/root/hed/h2f/15258040795166/-473Wx593H-460398814-blue-MODEL4.jpg",
             "https://assets.ajio.com/medias/sys_master/root/h86/h9d/16764497756190/-473Wx593H-460596707-blue-MODEL6.jpg",
             "https://assets.ajio.com/medias/sys_master/root/h34/h27/14914075230238/-473Wx593H-460441264-grey-MODEL4.jpg",
             "https://assets.ajio.com/medias/sys_master/root/h86/h9d/16764497756190/-473Wx593H-460596707-blue-MODEL6.jpg",
             "https://assets.ajio.com/medias/sys_master/root/ajio/catalog/5f03a4d9f997dd433b497e99/-473Wx593H-461214287-blue-MODEL5.jpg", ]
os.makedirs("Data",exist_ok=True)

## Download and iterate for preprocessing

In [None]:
show_data = []
for i, val in enumerate(t_img_url):

    name = f"Data/Tshirt-{i}.jpg"
    urllib.request.urlretrieve(val, name)
    bs4 = val
    vc = cls_obj.process(name)
    type_ = "Tshirt"

    show_data.append([type_, bs4, vc.tolist()])
for i, val in enumerate(j_img_url):

    name = f"Data/Jeans-{i}.jpg"
    urllib.request.urlretrieve(val, name)
    bs4 = val
    vc = cls_obj.process(name)
    type_ = "Jeans"
    #row = (type_, bs4, vc.tolist())
    row = [type_, bs4, vc.tolist()]
    show_data.append(row)