In [None]:
#install openCv
!pip install opencv-contrib-python==4.1.2.30 --user



In [None]:
#import all required libraries
import cv2 
import glob
import pandas as pd
import numpy as np
import pywt
import re
import statsmodels.api as sm

  import pandas.util.testing as tm


# (1) run this code to enable the function to get color attributes

In [None]:

def brightness(opened_file):
  Bright = cv2.cvtColor(opened_file, cv2.COLOR_BGR2GRAY)

  return round(Bright.mean(), 2)

def saturation(opened_file):
  hsv = cv2.cvtColor(opened_file, cv2.COLOR_BGR2HSV)

  # saturation is the s channel
  s = hsv[:, :, 1]

  return round(s.mean(), 2)

def contrast_of_brightness(opened_file):
  gray = cv2.cvtColor(opened_file, cv2.COLOR_BGR2GRAY)

  return round(gray.std(), 2)


def image_clarity(opened_file):
  gray = cv2.cvtColor(opened_file, cv2.COLOR_BGR2GRAY) 
  bright = gray >= .7

  return round(bright.sum() / bright.size, 2)
  
def warm_hue(opened_file):
  hsv = cv2.cvtColor(opened_file, cv2.COLOR_BGR2HSV)

  # hue is the h channel
  h = hsv[:, :, 0]
  warm = ~ (h > 30) & (h < 210)

  return round(warm.sum() / warm.size, 2)

def image_colorfulness(opened_file):
  # split the image into its respective RGB components
  (B, G, R) = cv2.split(opened_file.astype("float"))

  # compute rg = R - G
  rg = np.absolute(R - G)

  # compute yb = 0.5 * (R + G) - B
  yb = np.absolute(0.5 * (R + G) - B)

  # compute the mean and standard deviation of both `rg` and `yb`
  (rbMean, rbStd) = (np.mean(rg), np.std(rg))
  (ybMean, ybStd) = (np.mean(yb), np.std(yb))

  # combine the mean and standard deviations
  std_root = np.sqrt((rbStd ** 2) + (ybStd ** 2))
  mean_root = np.sqrt((rbMean ** 2) + (ybMean ** 2))

  # derive the "colorfulness" metric and return it
  return round(std_root + (0.3 * mean_root))

(2) composition

In [None]:
def Composition_variables (image):
    # this function returns the vaules corresponding to
    # 1- Rule of third
    # 2- Diagonal dominance 
    # 3- Vertical physical visual balance
    # 4- Horizontal physical visual balance
    
    
    #__________________________________________________________
    # Rule of third
    saliency = cv2.saliency.StaticSaliencyFineGrained_create()
    (success, saliencyMap) = saliency.computeSaliency(image)
    new_arr = ((saliencyMap - saliencyMap.min()) * (1/(saliencyMap.max() - saliencyMap.min()) * 255))
    threshMap = cv2.threshold(new_arr.astype('uint8'), 0, 255, cv2.THRESH_OTSU)[1]
    contours, hierarchy = cv2.findContours(threshMap,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    img_copy = image.copy()
    fx = 0
    fy = 0
    for c in contours:
       # calculate moments for each contour
        M = cv2.moments(c)
        area = cv2.contourArea(c)
        #img_copy = cv2.drawContours(img_copy, contours, contourIdx = -1, 
        #                     color = (255, 0, 0), thickness = 2)
        #print(area)
       # calculate x,y coordinate of center

        if M["m00"]!= 0.0:

            cX = int(M["m10"] / M["m00"])
            cY = int(M["m01"] / M["m00"])
            fx= fx + cX
            fy = fy + cY

    fx = round(fx/len(contours))
    fy = round(fy/len(contours)) 
    #cv2.circle(image, (fx, fy), 5, (255, 255, 255), -1)
    #cv2.putText(image, "centroid", (fx - 25, fy - 25),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
    y,x,c = image.shape
    P = [[y/3, x/3],[y/3, 2*x/3],[2*y/3, x/3],[2*y/3, 2*x/3]]
    dist =[]
    for p in P:
        d = ((fy-p[0])**2+(fx-p[1])**2)**.5
        dist.append(d)
    
    #__________________________________________________________
    # Diagonal dominance 
    xc = fx
    yc = fy

    ycc = y-yc
    yccp = xc*y/x
    theta = np.arctan(y/x)
    d1 = abs((ycc-yccp)*np.cos(theta))
    ycc,yccp,theta,d1

    xcp = x - (x/y*ycc)
    d2 = abs((xc-xcp)*np.sin(theta))
    
    #__________________________________________________________
    #  physical visual balance 
    
    ver_balance = -abs(y/2-fy)
    hor_balance = -abs(x/2-fx)

    return (min(dist), min(d1,d2),-ver_balance,-hor_balance)
  
def horizontal_color_balance(opened_file):
  mid = int(opened_file.shape[1] / 2)
  left_half = np.array(opened_file[:, 0:mid, ], dtype='int')
  right_half = np.flip(np.array(opened_file[:, mid:2 * mid, ],
                                      dtype='int'), axis=1)
  dif_square = np.square(left_half - right_half)
  euclidean = np.sqrt(dif_square.sum(axis=2))

  return round(euclidean.mean(), 2)

def vertical_color_balance(opened_file):
  mid = int(opened_file.shape[0] / 2)
  upper_half = np.array(opened_file[0:mid,: , ], dtype='int')
  lower_half = np.flip(np.array(opened_file[mid:2 * mid,: , ],
                                      dtype='int'), axis=1)
  dif_square = np.square(upper_half - lower_half)
  euclidean = np.sqrt(dif_square.sum(axis=2))

  return round(euclidean.mean(), 2)

(3) figure-background relationshp

In [None]:
def Difference(opened_file):
    
    #resize image to reduce processing time 
    opened_file = cv2.resize(opened_file, (0, 0), fx=0.5, fy=0.5)
    
    # initialize OpenCV's static fine grained saliency detector and
    # compute the saliency map

    #image = cv2.imread('C:\\Users\\oalq0001\\Downloads\\saliency-detection\\images\\players.jpg')
    saliency = cv2.saliency.StaticSaliencyFineGrained_create()
    (success, saliencyMap) = saliency.computeSaliency(opened_file)
    # change the scale into 0-255
    new_arr = ((saliencyMap - saliencyMap.min()) * (1/(saliencyMap.max() - saliencyMap.min()) * 255))

  
    threshMap = cv2.threshold(new_arr.astype('uint8'), 0, 255, cv2.THRESH_OTSU)[1]

    #display objects
    #cv2.imshow("Image", threshMap)
    #cv2.waitKey(0)
    
    kernel1 = np.ones((40,40),np.uint8)
    kernel2 = np.ones((20,20),np.uint8)

    closing = cv2.morphologyEx(threshMap, cv2.MORPH_CLOSE, kernel1)
    threshMap = cv2.erode(closing,kernel2,iterations = 1)

    # calculate Area difference
    objects_size = np.sum(threshMap/255)
    background_size = (threshMap.shape[0]*threshMap.shape[1]) - objects_size
    overall_size = threshMap.shape[0]*threshMap.shape[1]
    
    Area_difference = (objects_size )/(threshMap.shape[0]*threshMap.shape[1])
    
    # calculate Color
    Objects_image = cv2.bitwise_and(opened_file,opened_file,mask = (threshMap/255).astype('uint8'))
    Background_image = cv2.bitwise_and(opened_file,opened_file,mask = ~(threshMap).astype('uint8'))
    
    R_Obj = np.sum(Objects_image[:,:,0])/objects_size
    G_Obj = np.sum(Objects_image[:,:,1])/objects_size
    B_Obj = np.sum(Objects_image[:,:,2])/objects_size
    
    R_back = np.sum(Background_image[:,:,0])/background_size
    G_back = np.sum(Background_image[:,:,1])/background_size
    B_back = np.sum(Background_image[:,:,2])/background_size
    
    color_object = str(R_Obj)+','+str(G_Obj)+','+str(B_Obj)
    color_Background = str(R_back)+','+str(G_back)+','+str(B_back)
    
    # calculate Color difference
    
    Color_difference = ((R_Obj - R_back)**2 + (G_Obj - G_back)**2 + (B_Obj - B_back)**2)**.5
    
    #calculate Texture difference
    edges_objects = cv2.Canny(Objects_image, 50, 150, apertureSize=3)
    edges_background = cv2.Canny(Background_image, 50, 150, apertureSize=3)

    edges_objects_density = np.sum(edges_objects/255)/objects_size
    edges_background_density = np.sum(edges_background/255)/background_size

    Texture_difference = abs(edges_background_density - edges_objects_density)
      
    
    return (Texture_difference,Area_difference, Color_difference)
  
def calc_wavelet (block):
    hsv = cv2.cvtColor(block, cv2.COLOR_BGR2HSV)
    sums =[]
    for i in range(3):
        Channel = hsv[:,:,i]
        coffes_H=pywt.dwt2(Channel,'db1')
        ca,(ch,cv,cd)= coffes_H
        sums.append (np.sum(cd))
    return sums


def Depth_of_field (image):
    
    # Define the window size
    windowsize_r = round(image.shape[0]/4)
    windowsize_c = round(image.shape[1]/4)

    # devide the image into blocks
    window = np.zeros(shape=(windowsize_r,windowsize_c,3,16)).astype('uint8')
    k = 0
    for r in range(0,image.shape[0] - windowsize_r, windowsize_r):
        for c in range(0,image.shape[1] - windowsize_c, windowsize_c):
            window[:,:,:,k] = image[r:r+windowsize_r,c:c+windowsize_c]
            k = k +1
            #display objects

    #center blocks
    center = [5,6,9,10]

    #calaculate the wavelet coeff. for center blocks
    sum_center_blocks = np.zeros(shape = [3])
    for i in center:
        block = window[:,:,:,i]
        Sums = calc_wavelet (block)
        sum_center_blocks = sum_center_blocks + Sums

    #calaculate the wavelet coeff. for whole image 
    Sum_whole_image = calc_wavelet (image)

    #return  the depth of feild for each channel (H,S, and V)
    return abs(sum_center_blocks/Sum_whole_image)


In [None]:
import glob

In [None]:
# make sure to put the right path. 

list_of_files_loc = glob.glob('/content/drive/My Drive/Instagram_Images.jpg/*.*')
list_of_files = [x for x in list_of_files_loc if re.search('jpg',x)]#if your pictures are not .jpg, then please change ".jpg" to other type

In [None]:
len(list_of_files_loc)

1261

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#check if you get all the pictures
len(list_of_files) #if you have 1000 pictures, then the length of this list of files should be 1000

1261

In [None]:
#this is the function to extract all 18 features from one picture
def collect_features (filename):
    image = cv2.imread(filename)
    id = filename.split("/")[-1].split('.')[0]
    (Depth_of_field_hue, Depth_of_field_saturation, Depth_of_field_saturation_value) = Depth_of_field (image)
    Brightness = brightness (image)
    Saturation = saturation (image)
    Contrast = contrast_of_brightness (image)
    Clarity = image_clarity (image)
    Warm_hue = warm_hue (image)
    Colorfulness = image_colorfulness (image)
    hor_color = horizontal_color_balance(image)
    ver_color = vertical_color_balance(image)
    (Rule_third, diagonal_dom, physical_ver, physical_hor) = Composition_variables(image)
    (Texture_difference,Area_difference, Color_difference) = Difference(image)
    

    #we extract all the visual features from one picture 
    visual_features  ={'short_code':id,
               'Colorfulness_Picture':Colorfulness,
               'Saturation_Picture': Saturation, 
               'Contrast_Picture':Contrast,
               'Clarity_Picture':Clarity,
               'Brightness_Picture':Brightness,
               'Warm Hue_Picture':Warm_hue,
               
               'ROT_Picture':Rule_third,
               'Diagonal_dominance_Picture':diagonal_dom,
               'Physical_ver_Picture':physical_ver,
               'Physical_Hor_Picture':physical_hor,
               'Color_balance_vertical_Picture':ver_color,
               'Color_balance_horizontal_Picture':hor_color,
               
               
               'Texture_difference_Picture':Texture_difference,
               'Size_difference_Picture':Area_difference,
               'Color_difference_Picture':Color_difference,
               'Depth of field_hue_Picture':Depth_of_field_hue,
               'Depth of field_saturation_Picture':Depth_of_field_saturation,
               'Depth of field_value_Picture':Depth_of_field_saturation_value,
               
              }
    
    
    return visual_features
        

In [None]:

all_features_combined = []
#loop over list_of_files
for i,filename in enumerate (list_of_files):
    print('processing image no.:',i)
    # send the image name to collect_features function append then results to list
    try:
      all_features = collect_features (filename)
      all_features_combined.append(all_features)
    except:
      continue
     
    
   

processing image no.: 0
processing image no.: 1
processing image no.: 2
processing image no.: 3
processing image no.: 4
processing image no.: 5
processing image no.: 6
processing image no.: 7
processing image no.: 8
processing image no.: 9
processing image no.: 10
processing image no.: 11
processing image no.: 12
processing image no.: 13
processing image no.: 14
processing image no.: 15
processing image no.: 16
processing image no.: 17
processing image no.: 18
processing image no.: 19
processing image no.: 20
processing image no.: 21
processing image no.: 22
processing image no.: 23
processing image no.: 24
processing image no.: 25
processing image no.: 26
processing image no.: 27
processing image no.: 28
processing image no.: 29
processing image no.: 30
processing image no.: 31
processing image no.: 32
processing image no.: 33
processing image no.: 34
processing image no.: 35
processing image no.: 36
processing image no.: 37
processing image no.: 38
processing image no.: 39
processing



processing image no.: 119
processing image no.: 120
processing image no.: 121
processing image no.: 122
processing image no.: 123
processing image no.: 124
processing image no.: 125
processing image no.: 126
processing image no.: 127
processing image no.: 128
processing image no.: 129
processing image no.: 130
processing image no.: 131
processing image no.: 132
processing image no.: 133
processing image no.: 134
processing image no.: 135
processing image no.: 136
processing image no.: 137
processing image no.: 138
processing image no.: 139
processing image no.: 140
processing image no.: 141
processing image no.: 142
processing image no.: 143
processing image no.: 144
processing image no.: 145
processing image no.: 146
processing image no.: 147
processing image no.: 148
processing image no.: 149
processing image no.: 150
processing image no.: 151
processing image no.: 152
processing image no.: 153
processing image no.: 154
processing image no.: 155
processing image no.: 156
processing i



processing image no.: 233
processing image no.: 234
processing image no.: 235
processing image no.: 236
processing image no.: 237
processing image no.: 238
processing image no.: 239
processing image no.: 240
processing image no.: 241
processing image no.: 242
processing image no.: 243
processing image no.: 244
processing image no.: 245
processing image no.: 246
processing image no.: 247
processing image no.: 248
processing image no.: 249
processing image no.: 250
processing image no.: 251
processing image no.: 252
processing image no.: 253
processing image no.: 254
processing image no.: 255
processing image no.: 256
processing image no.: 257
processing image no.: 258
processing image no.: 259
processing image no.: 260
processing image no.: 261
processing image no.: 262
processing image no.: 263
processing image no.: 264
processing image no.: 265
processing image no.: 266
processing image no.: 267
processing image no.: 268
processing image no.: 269
processing image no.: 270
processing i

In [None]:
#check your visual features
all_features_combined[:5]

[{'Brightness_Picture': 60.65,
  'Clarity_Picture': 1.0,
  'Color_balance_horizontal_Picture': 57.83,
  'Color_balance_vertical_Picture': 88.24,
  'Color_difference_Picture': 73.27166101254501,
  'Colorfulness_Picture': 34.0,
  'Contrast_Picture': 45.96,
  'Depth of field_hue_Picture': 3.4948453608247325,
  'Depth of field_saturation_Picture': 0.6453215567209489,
  'Depth of field_value_Picture': 0.21364985163205186,
  'Diagonal_dominance_Picture': 28.2842712474619,
  'Physical_Hor_Picture': 46.0,
  'Physical_ver_Picture': 86.0,
  'ROT_Picture': 58.240879114244144,
  'Saturation_Picture': 136.43,
  'Size_difference_Picture': 0.391996667948225,
  'Texture_difference_Picture': 0.0693572859874101,
  'Warm Hue_Picture': 0.95,
  'short_code': '342'},
 {'Brightness_Picture': 76.72,
  'Clarity_Picture': 1.0,
  'Color_balance_horizontal_Picture': 99.52,
  'Color_balance_vertical_Picture': 95.74,
  'Color_difference_Picture': 77.82612691667529,
  'Colorfulness_Picture': 42.0,
  'Contrast_Pictur

In [None]:
import pandas as pd

In [None]:
# save the results in a DataFrame
visual_data = pd.DataFrame(all_features_combined).astype(np.float64)

In [None]:
#check our visual data
visual_data.head()

Unnamed: 0,short_code,Colorfulness_Picture,Saturation_Picture,Contrast_Picture,Clarity_Picture,Brightness_Picture,Warm Hue_Picture,ROT_Picture,Diagonal_dominance_Picture,Physical_ver_Picture,Physical_Hor_Picture,Color_balance_vertical_Picture,Color_balance_horizontal_Picture,Texture_difference_Picture,Size_difference_Picture,Color_difference_Picture,Depth of field_hue_Picture,Depth of field_saturation_Picture,Depth of field_value_Picture
0,342.0,34.0,136.43,45.96,1.0,60.65,0.95,58.240879,28.284271,86.0,46.0,88.24,57.83,0.069357,0.391997,73.271661,3.494845,0.645322,0.21365
1,343.0,42.0,169.02,52.41,1.0,76.72,0.93,103.827314,2.950363,68.0,93.0,95.74,99.52,0.106102,0.280105,77.826127,0.108454,0.015326,0.378378
2,346.0,25.0,101.3,53.58,1.0,57.69,0.11,101.965681,88.752031,166.0,89.0,104.16,71.45,0.160821,0.267258,118.805706,1.717448,0.39188,0.161826
3,348.0,57.0,176.07,59.06,1.0,98.85,0.93,191.253758,11.313708,37.0,53.0,116.01,83.18,0.10204,0.302761,39.552645,0.03644,0.09943,0.276471
4,350.0,51.0,84.33,66.88,1.0,132.42,0.35,229.836899,3.535534,20.0,15.0,135.02,138.5,0.094689,0.429791,43.58982,0.64378,0.056349,1.047619


In [None]:
#then, you can download your visual_data dataframe. use this code to download the the dataframe. 
from google.colab import files

visual_data.to_csv("visual_features_fromOpenCV.csv", index=False)
files.download('visual_features_fromOpenCV.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Nespresso (1).xlsx to Nespresso (1).xlsx


In [None]:
#import the pictures data with likes
likes_data = pd.read_excel("Nespresso (1).xlsx").loc[:,["short_code","likeCount"]]

In [None]:
#check your data
likes_data.head()

Unnamed: 0,short_code,likeCount
0,342,667.0
1,343,3297.0
2,346,4483.0
3,348,4187.0
4,350,4935.0


In [None]:
#merge the data
data_ready = visual_data.merge(likes_data, on="short_code", how="inner").fillna(0)

In [None]:
#start the regression
y = data_ready['likeCount']#your Y / dependent variable
x = data_ready.drop(['likeCount'], axis=1).astype(float) #your X/ Independent variabels
x = sm.add_constant(x)
model= sm.OLS(y, x).fit()

In [None]:
#start the regression
y = data_ready['eng_score']#your Y / dependent variable
x = data_ready.drop(['eng_score'], axis=1).astype(float) #your X/ Independent variabels
x = sm.add_constant(x)
model2= sm.OLS(y, x).fit()

In [None]:
model2.summary()

0,1,2,3
Dep. Variable:,eng_score,R-squared:,0.089
Model:,OLS,Adj. R-squared:,0.075
Method:,Least Squares,F-statistic:,6.403
Date:,"Fri, 14 Aug 2020",Prob (F-statistic):,4.3e-16
Time:,05:04:33,Log-Likelihood:,953.35
No. Observations:,1261,AIC:,-1867.0
Df Residuals:,1241,BIC:,-1764.0
Df Model:,19,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0899,0.071,-1.260,0.208,-0.230,0.050
short_code,7.334e-05,8.06e-06,9.098,0.000,5.75e-05,8.92e-05
Colorfulness_Picture,0.0005,0.000,1.898,0.058,-1.74e-05,0.001
Saturation_Picture,-0.0002,0.000,-1.647,0.100,-0.001,4.56e-05
Contrast_Picture,-0.0006,0.000,-1.187,0.235,-0.002,0.000
Clarity_Picture,0.1205,0.068,1.771,0.077,-0.013,0.254
Brightness_Picture,-0.0001,0.000,-1.058,0.290,-0.000,0.000
Warm Hue_Picture,-0.0030,0.013,-0.231,0.817,-0.029,0.023
ROT_Picture,0.0003,8.33e-05,3.156,0.002,9.94e-05,0.000

0,1,2,3
Omnibus:,548.54,Durbin-Watson:,1.164
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3258.012
Skew:,1.938,Prob(JB):,0.0
Kurtosis:,9.855,Cond. No.,29700.0


In [None]:
model.summary()

0,1,2,3
Dep. Variable:,likeCount,R-squared:,0.142
Model:,OLS,Adj. R-squared:,0.129
Method:,Least Squares,F-statistic:,10.83
Date:,"Fri, 14 Aug 2020",Prob (F-statistic):,1.5099999999999999e-30
Time:,04:40:42,Log-Likelihood:,-11884.0
No. Observations:,1261,AIC:,23810.0
Df Residuals:,1241,BIC:,23910.0
Df Model:,19,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3133.0100,1880.924,-1.666,0.096,-6823.152,557.132
short_code,2.5815,0.213,12.139,0.000,2.164,2.999
Colorfulness_Picture,17.3791,7.156,2.429,0.015,3.340,31.418
Saturation_Picture,-7.1022,3.818,-1.860,0.063,-14.593,0.388
Contrast_Picture,-20.6480,12.796,-1.614,0.107,-45.751,4.455
Clarity_Picture,3831.2924,1795.509,2.134,0.033,308.723,7353.862
Brightness_Picture,-3.7355,2.959,-1.262,0.207,-9.541,2.070
Warm Hue_Picture,-35.5868,343.785,-0.104,0.918,-710.051,638.878
ROT_Picture,8.5176,2.197,3.878,0.000,4.208,12.827

0,1,2,3
Omnibus:,184.637,Durbin-Watson:,0.993
Prob(Omnibus):,0.0,Jarque-Bera (JB):,322.564
Skew:,0.929,Prob(JB):,9.04e-71
Kurtosis:,4.638,Cond. No.,29700.0


In [None]:
#interpret the regression
df_coeffs = pd.DataFrame({"Coefficients": model.params, "p": model.pvalues})

df_coeffs_sorted = df_coeffs.reindex(df_coeffs["p"].sort_values().index)
df_coeffs_sorted.style.bar(subset = ["Coefficients", "p"], align='mid', color=['#d65f5f', '#5fba7d'])

Unnamed: 0,Coefficients,p
short_code,2.581486,0.0
Physical_ver_Picture,7.047779,8.1e-05
ROT_Picture,8.517649,0.000111
Physical_Hor_Picture,5.402078,0.004462
Colorfulness_Picture,17.379083,0.015297
Color_balance_horizontal_Picture,8.314086,0.016275
Diagonal_dominance_Picture,-7.93819,0.017663
Clarity_Picture,3831.29242,0.033053
Saturation_Picture,-7.102175,0.063104
Texture_difference_Picture,3605.681076,0.07305


In [None]:
model.pvalues[model.pvalues<0.05]


short_code                          3.996099e-32
Colorfulness_Picture                1.529669e-02
Clarity_Picture                     3.305318e-02
ROT_Picture                         1.109471e-04
Diagonal_dominance_Picture          1.766349e-02
Physical_ver_Picture                8.112892e-05
Physical_Hor_Picture                4.462117e-03
Color_balance_horizontal_Picture    1.627492e-02
dtype: float64

In [None]:
model.params[model.pvalues<0.05]

short_code                             2.581486
Colorfulness_Picture                  17.379083
Clarity_Picture                     3831.292420
ROT_Picture                            8.517649
Diagonal_dominance_Picture            -7.938190
Physical_ver_Picture                   7.047779
Physical_Hor_Picture                   5.402078
Color_balance_horizontal_Picture       8.314086
dtype: float64

In [None]:
df_coeffs = pd.DataFrame({"Coefficients": model.params[model.pvalues<0.05], "p": model.pvalues[model.pvalues<0.05]})

df_coeffs_sorted = df_coeffs.reindex(df_coeffs["p"].sort_values().index)
df_coeffs_sorted.style.bar(subset = ["Coefficients", "p"], align='mid', color=['#d65f5f', '#5fba7d'])

Unnamed: 0,Coefficients,p
short_code,2.581486,0.0
Physical_ver_Picture,7.047779,8.1e-05
ROT_Picture,8.517649,0.000111
Physical_Hor_Picture,5.402078,0.004462
Colorfulness_Picture,17.379083,0.015297
Color_balance_horizontal_Picture,8.314086,0.016275
Diagonal_dominance_Picture,-7.93819,0.017663
Clarity_Picture,3831.29242,0.033053


In [None]:
data_ready

Unnamed: 0,short_code,Colorfulness_Picture,Saturation_Picture,Contrast_Picture,Clarity_Picture,Brightness_Picture,Warm Hue_Picture,ROT_Picture,Diagonal_dominance_Picture,Physical_ver_Picture,Physical_Hor_Picture,Color_balance_vertical_Picture,Color_balance_horizontal_Picture,Texture_difference_Picture,Size_difference_Picture,Color_difference_Picture,Depth of field_hue_Picture,Depth of field_saturation_Picture,Depth of field_value_Picture,likeCount
0,342.0,34.0,136.43,45.96,1.0,60.65,0.95,58.240879,28.284271,86.0,46.0,88.24,57.83,0.069357,0.391997,73.271661,3.494845,0.645322,0.213650,667.0
1,343.0,42.0,169.02,52.41,1.0,76.72,0.93,103.827314,2.950363,68.0,93.0,95.74,99.52,0.106102,0.280105,77.826127,0.108454,0.015326,0.378378,3297.0
2,346.0,25.0,101.30,53.58,1.0,57.69,0.11,101.965681,88.752031,166.0,89.0,104.16,71.45,0.160821,0.267258,118.805706,1.717448,0.391880,0.161826,4483.0
3,348.0,57.0,176.07,59.06,1.0,98.85,0.93,191.253758,11.313708,37.0,53.0,116.01,83.18,0.102040,0.302761,39.552645,0.036440,0.099430,0.276471,4187.0
4,350.0,51.0,84.33,66.88,1.0,132.42,0.35,229.836899,3.535534,20.0,15.0,135.02,138.50,0.094689,0.429791,43.589820,0.643780,0.056349,1.047619,4935.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1256,336.0,34.0,70.69,66.72,1.0,123.01,0.48,101.162685,36.062446,82.5,133.5,152.37,139.07,0.046851,0.595820,46.518927,0.169162,2.483023,0.081839,0.0
1257,337.0,33.0,60.49,81.07,1.0,145.53,0.41,200.375204,14.142136,43.5,23.5,186.56,137.31,0.062193,0.443327,63.282606,0.036158,3.091581,1.140461,0.0
1258,338.0,15.0,33.36,61.03,1.0,169.97,0.18,80.099938,59.396970,106.0,22.0,127.81,84.48,0.067135,0.282744,153.853491,3.163476,0.243836,2.698795,0.0
1259,339.0,58.0,96.08,66.24,1.0,137.53,0.48,153.818436,63.803117,122.5,35.0,159.84,148.07,0.153592,0.816124,148.749948,0.657812,0.678626,0.135385,0.0


In [None]:
x=data_ready[['Physical_Hor_Picture','ROT_Picture','Colorfulness_Picture','Color_balance_horizontal_Picture','Diagonal_dominance_Picture','Clarity_Picture']]

In [None]:
y = data_ready['likeCount']#your Y / dependent variable
x =x #your X/ Independent variabels
x = sm.add_constant(x)
model1= sm.OLS(y, x).fit()

In [None]:
model1.summary()

0,1,2,3
Dep. Variable:,likeCount,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,3.452
Date:,"Fri, 14 Aug 2020",Prob (F-statistic):,0.00219
Time:,05:01:13,Log-Likelihood:,-11970.0
No. Observations:,1261,AIC:,23950.0
Df Residuals:,1254,BIC:,23990.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1712.1270,1750.617,0.978,0.328,-1722.334,5146.588
Physical_Hor_Picture,1.4558,1.939,0.751,0.453,-2.348,5.259
ROT_Picture,-0.5243,1.960,-0.268,0.789,-4.369,3.321
Colorfulness_Picture,20.0494,4.929,4.067,0.000,10.379,29.720
Color_balance_horizontal_Picture,-0.8933,2.719,-0.329,0.743,-6.227,4.440
Diagonal_dominance_Picture,-4.8562,3.448,-1.409,0.159,-11.620,1.908
Clarity_Picture,1235.3117,1778.164,0.695,0.487,-2253.193,4723.817

0,1,2,3
Omnibus:,177.649,Durbin-Watson:,0.845
Prob(Omnibus):,0.0,Jarque-Bera (JB):,282.745
Skew:,0.945,Prob(JB):,4e-62
Kurtosis:,4.344,Cond. No.,4990.0


In [None]:
x=data_ready[[

In [None]:
y = data_ready['eng_score']#your Y / dependent variable
x =x #your X/ Independent variabels
x = sm.add_constant(x)
model3= sm.OLS(y, x).fit()

In [None]:
model3.summary()

0,1,2,3
Dep. Variable:,eng_score,R-squared:,0.089
Model:,OLS,Adj. R-squared:,0.075
Method:,Least Squares,F-statistic:,6.403
Date:,"Fri, 14 Aug 2020",Prob (F-statistic):,4.3e-16
Time:,05:05:35,Log-Likelihood:,953.35
No. Observations:,1261,AIC:,-1867.0
Df Residuals:,1241,BIC:,-1764.0
Df Model:,19,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0899,0.071,-1.260,0.208,-0.230,0.050
short_code,7.334e-05,8.06e-06,9.098,0.000,5.75e-05,8.92e-05
Colorfulness_Picture,0.0005,0.000,1.898,0.058,-1.74e-05,0.001
Saturation_Picture,-0.0002,0.000,-1.647,0.100,-0.001,4.56e-05
Contrast_Picture,-0.0006,0.000,-1.187,0.235,-0.002,0.000
Clarity_Picture,0.1205,0.068,1.771,0.077,-0.013,0.254
Brightness_Picture,-0.0001,0.000,-1.058,0.290,-0.000,0.000
Warm Hue_Picture,-0.0030,0.013,-0.231,0.817,-0.029,0.023
ROT_Picture,0.0003,8.33e-05,3.156,0.002,9.94e-05,0.000

0,1,2,3
Omnibus:,548.54,Durbin-Watson:,1.164
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3258.012
Skew:,1.938,Prob(JB):,0.0
Kurtosis:,9.855,Cond. No.,29700.0


In [None]:
data_ready.head()

Unnamed: 0,short_code,Colorfulness_Picture,Saturation_Picture,Contrast_Picture,Clarity_Picture,Brightness_Picture,Warm Hue_Picture,ROT_Picture,Diagonal_dominance_Picture,Physical_ver_Picture,Physical_Hor_Picture,Color_balance_vertical_Picture,Color_balance_horizontal_Picture,Texture_difference_Picture,Size_difference_Picture,Color_difference_Picture,Depth of field_hue_Picture,Depth of field_saturation_Picture,Depth of field_value_Picture,likeCount
0,342.0,34.0,136.43,45.96,1.0,60.65,0.95,58.240879,28.284271,86.0,46.0,88.24,57.83,0.069357,0.391997,73.271661,3.494845,0.645322,0.21365,667.0
1,343.0,42.0,169.02,52.41,1.0,76.72,0.93,103.827314,2.950363,68.0,93.0,95.74,99.52,0.106102,0.280105,77.826127,0.108454,0.015326,0.378378,3297.0
2,346.0,25.0,101.3,53.58,1.0,57.69,0.11,101.965681,88.752031,166.0,89.0,104.16,71.45,0.160821,0.267258,118.805706,1.717448,0.39188,0.161826,4483.0
3,348.0,57.0,176.07,59.06,1.0,98.85,0.93,191.253758,11.313708,37.0,53.0,116.01,83.18,0.10204,0.302761,39.552645,0.03644,0.09943,0.276471,4187.0
4,350.0,51.0,84.33,66.88,1.0,132.42,0.35,229.836899,3.535534,20.0,15.0,135.02,138.5,0.094689,0.429791,43.58982,0.64378,0.056349,1.047619,4935.0


In [None]:
likes_data = pd.read_excel("Nespresso (1).xlsx").loc[:,["short_code","eng_score"]]

In [None]:
data_ready = visual_data.merge(likes_data, on="short_code", how="inner").fillna(0)

In [None]:
y = data_ready['eng_score']#your Y / dependent variable
x = data_ready.drop(['eng_score'], axis=1).astype(float) #your X/ Independent variabels
x = sm.add_constant(x)
model= sm.OLS(y, x).fit()

In [None]:
df_coeffs = pd.DataFrame({"Coefficients": model.params, "p": model.pvalues})
df_coeffs_sorted = df_coeffs.reindex(df_coeffs["p"].sort_values().index)
df_coeffs_sorted.style.bar(subset = ["Coefficients", "p"], align='mid', color=['#d65f5f', '#5fba7d'])

Unnamed: 0,Coefficients,p
short_code,7.3e-05,0.0
Physical_ver_Picture,0.000237,0.000457
ROT_Picture,0.000263,0.001639
Physical_Hor_Picture,0.000223,0.001972
Colorfulness_Picture,0.000515,0.057987
Color_balance_horizontal_Picture,0.000242,0.065327
Clarity_Picture,0.120542,0.076808
Diagonal_dominance_Picture,-0.000211,0.095873
Saturation_Picture,-0.000238,0.099788
Texture_difference_Picture,0.124672,0.102016


In [None]:
df_coeffs = pd.DataFrame({"Coefficients": model.params[model.pvalues<0.05], "p": model.pvalues[model.pvalues<0.05]})
df_coeffs_sorted = df_coeffs.reindex(df_coeffs["p"].sort_values().index)
df_coeffs_sorted.style.bar(subset = ["Coefficients", "p"], align='mid', color=['#d65f5f', '#5fba7d'])

Unnamed: 0,Coefficients,p
short_code,7.3e-05,0.0
Physical_ver_Picture,0.000237,0.000457
ROT_Picture,0.000263,0.001639
Physical_Hor_Picture,0.000223,0.001972


In [None]:
likes_data = pd.read_excel("Nespresso (1).xlsx").loc[:,["short_code","contains_coffee"]]

In [None]:
data_ready = visual_data.merge(likes_data, on="short_code", how="inner").fillna(0)

In [None]:
y = data_ready['contains_coffee']#your Y / dependent variable
x = data_ready.drop(['contains_coffee'], axis=1).astype(float) #your X/ Independent variabels
x = sm.add_constant(x)
model= sm.OLS(y, x).fit()

In [None]:
df_coeffs = pd.DataFrame({"Coefficients": model.params, "p": model.pvalues})
df_coeffs_sorted = df_coeffs.reindex(df_coeffs["p"].sort_values().index)
df_coeffs_sorted.style.bar(subset = ["Coefficients", "p"], align='mid', color=['#d65f5f', '#5fba7d'])

Unnamed: 0,Coefficients,p
short_code,9.6e-05,0.000109
Saturation_Picture,-0.000946,0.032974
Color_balance_vertical_Picture,-0.001044,0.04254
Colorfulness_Picture,0.001641,0.048481
Clarity_Picture,0.399222,0.055743
ROT_Picture,0.000483,0.058717
Brightness_Picture,-0.000598,0.082209
Color_balance_horizontal_Picture,0.000627,0.118688
Physical_ver_Picture,0.000314,0.128959
const,-0.322675,0.139821
