# Reading Scrapped Files

In [None]:
import pandas as pd
df_amazon_women = pd.read_csv("scraped_data/Amazon_women.csv", thousands = ',')
df_amazon_men = pd.read_csv("scraped_data/Amazon_Men.csv", thousands = ',')
df_myntra_men = pd.read_csv("scraped_data/myntra_men_latest.csv")
df_myntra_women = pd.read_csv("scraped_data/myntra_women_latest.csv")



# Pre-Processing

In [None]:
df_myntra_men = df_myntra_men.dropna()
df_myntra_women = df_myntra_women.dropna()

df_myntra_men.rename(columns = {'image': 'Links', 'rating': 'Rating', 'reviews' : 'Reviews'}, inplace = True)
df_myntra_women.rename(columns = {'image': 'Links', 'rating': 'Rating', 'reviews' : 'Reviews'}, inplace = True)

In [None]:
df_myntra_men["Reviews"] = df_myntra_men["Reviews"].str.split(expand=True)[0]

df_myntra_men.Reviews = (df_myntra_men.Reviews.replace(r'[Kk]+$', '', regex=True).astype(float) * df_myntra_men.Reviews.str.extract(r'[\d\.]+([Kk]+)', expand=False).fillna(1).replace(['K','k'], [10**3, 10**3]).astype(int))

In [None]:
df_myntra_women["Reviews"] = df_myntra_women["Reviews"].str.split(expand=True)[0]

df_myntra_women.Reviews = (df_myntra_women.Reviews.replace(r'[Kk]+$', '', regex=True).astype(float) * df_myntra_women.Reviews.str.extract(r'[\d\.]+([Kk]+)', expand=False).fillna(1).replace(['K','k'], [10**3, 10**3]).astype(int))

In [None]:
for i in range(len(df_amazon_men)):
  try:
    df_amazon_men.iloc[i][3] = df_amazon_men.iloc[i][3].split(" ")[0].replace(",", "")
  except:
    pass

for i in range(len(df_amazon_women)):
  try:
    df_amazon_women.iloc[i][3] = df_amazon_women.iloc[i][3].split(" ")[0].replace(",", "")
  except:
    pass

df_amazon_men["Reviews"] = df_amazon_men["Reviews"].astype('int32')
df_amazon_women["Reviews"] = df_amazon_women["Reviews"].astype('int32')

In [None]:
df_amazon = pd.read_csv("scraped_data/df_amazon.csv", thousands = ',')
df_flipkart = pd.read_csv("scraped_data/df_flipkart.csv", thousands = ',')

df_amazon = df_amazon.drop(columns=["Unnamed: 0"])

for i in range(len(df_amazon)):
  try:
    df_amazon.iloc[i][1] = df_amazon.iloc[i][1].split(" ")[0]
  except:
    pass
  try:
    df_amazon.iloc[i][2] = df_amazon.iloc[i][2].split(" ")[0].replace(",", "")
  except:
    pass

df_flipkart = df_flipkart.drop(columns=["Unnamed: 0"])

for i in range(len(df_flipkart)):
  try:
    df_flipkart.iloc[i][1] = df_flipkart.iloc[i][1].split(" ")[0]
  except:
    pass
  try:
    df_flipkart.iloc[i][2] = df_flipkart.iloc[i][2].split(" ")[0].replace(",", "")
  except:
    pass

In [None]:
df_amazon["rating"] = df_amazon["rating"].astype('float32')
df_amazon["no_of_reviews"] = df_amazon["no_of_reviews"].astype('int32')

df_flipkart["rating"] = df_flipkart["rating"].astype('float32')
df_flipkart["no_of_reviews"] = df_flipkart["no_of_reviews"].astype('int32')

In [None]:
df_amazon.rename(columns = {'img_links': 'Links', 'rating': 'Rating', 'no_of_reviews' : 'Reviews'}, inplace = True)
df_flipkart.rename(columns = {'img_links': 'Links', 'rating': 'Rating', 'no_of_reviews' : 'Reviews'}, inplace = True)

# Merging All the PreProcessed Data Into One File

In [None]:
df_merged = pd.concat([df_amazon, df_flipkart, df_amazon_men, df_amazon_women, df_myntra_men, df_myntra_women], axis=0, ignore_index=True)

In [None]:
df_merged = df_merged[["Links", "Rating", "Reviews"]]

# Calculating Popularity Metric For Trend Prediction

In [None]:
def pop_met(n, s):
    top = s*(15+n)*1.0
    bott = n+5*s*1.0
    pm = top/bott
    return pm

df_merged["popularity"] = pop_met(df_merged["Reviews"], df_merged["Rating"])

In [None]:
import matplotlib.pyplot as plt

plt.plot(df_merged["popularity"])

# Classifying Images Based On Popularity Values Into Various Classes(Multi-Class Classificaton)

In [None]:
import urllib.request

for i in range(len(df_merged)):
    pop = df_merged.loc[i]["popularity"]
    url = df_merged.loc[i]["Links"]
    #print(url)
    #print(pop)
    if(pop < 3): urllib.request.urlretrieve(url, "dataset/0/c" + str(i) + ".jpg")
    elif(pop >= 3 and pop < 3.5) : urllib.request.urlretrieve(url, "dataset/1/c" + str(i) + ".jpg")
    elif(pop >= 3.5 and pop < 4) : urllib.request.urlretrieve(url, "dataset/2/c" + str(i) + ".jpg")
    else: urllib.request.urlretrieve(url, "dataset/3/c" + str(i) + ".jpg")

In [None]:
df_merged.head()

# Training Using On Various Layers Using Tensorflow Library

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

BATCH_SIZE = 32
IMG_SIZE = (160, 160)

data_dir = "dataset"

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  shuffle = True,
  subset="training",
  seed=123,
  image_size=IMG_SIZE,
  batch_size=BATCH_SIZE)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  shuffle = True,
  subset="validation",
  seed=123,
  image_size=IMG_SIZE,
  batch_size=BATCH_SIZE)

In [None]:
import matplotlib.pyplot as plt

class_names = train_ds.class_names

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

# Image Reinfinement Adjusting Pixels etc.

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)

In [None]:
rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)

# Model For Image Classification

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16

IMG_SHAPE = IMG_SIZE + (3,)
model = VGG16(weights="imagenet", include_top=False, input_shape = IMG_SHAPE)

In [None]:
image_batch, label_batch = next(iter(train_ds))
feature_batch = model(image_batch)
print(feature_batch.shape)

In [None]:
model.trainable = False

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

In [None]:
prediction_layer = tf.keras.layers.Dense(4)
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

# Passing Inputs For Various Layers

In [None]:
inputs = tf.keras.Input(shape=(160, 160, 3))
x = rescale(inputs)
x = model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)

In [None]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
initial_epochs = 10

loss0, accuracy0 = model.evaluate(val_ds)

In [None]:
history = model.fit(train_ds,
                    epochs=initial_epochs,
                    validation_data=val_ds)

# Saving The Model

In [None]:
model.save("image_model_v2")

In [None]:
model.save("image_model_v2.h5")