## First approach

In [1]:
# Required modules
import cv2
import numpy as np
import matplotlib.pyplot as plt

min_YCrCb = np.array([0,133,77],np.uint8)
max_YCrCb = np.array([235,173,127],np.uint8)

# Get pointer to video frames from primary device
image = cv2.imread("filename")
imageYCrCb = cv2.cvtColor(image,cv2.COLOR_BGR2YCR_CB)
skinRegionYCrCb = cv2.inRange(imageYCrCb,min_YCrCb,max_YCrCb)

skinYCrCb = cv2.bitwise_and(image, image, mask = skinRegionYCrCb)

cv2.imwrite("filename_filtered.png", np.hstack([image,skinYCrCb]))

error: OpenCV(4.1.0) C:\projects\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


## Second approach

In [1]:
# Required modules
import cv2
import numpy as np
import matplotlib.pyplot as plt

min_HSV = np.array([0, 58, 30], dtype = "uint8")
max_HSV = np.array([33, 255, 255], dtype = "uint8")
# Get pointer to video frames from primary device
image = cv2.imread("filename")
imageHSV = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
skinRegionHSV = cv2.inRange(imageHSV, min_HSV, max_HSV)

skinHSV = cv2.bitwise_and(image, image, mask = skinRegionHSV)

# print(skinHSV/image)

cv2.imwrite("filename_filtered.png", np.hstack([image, skinHSV]))

error: OpenCV(4.3.0) C:\projects\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


## Calculate ratio of skin to photo

In [None]:
img = cv2.imread('filename')
grid_HSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# skin colour
min_HSV = np.array([0, 58, 30], dtype = "uint8")
max_HSV = np.array([33, 255, 255], dtype = "uint8")

mask= cv2.inRange(grid_HSV, min_HSV, max_HSV)

# find the ratio
ratio = cv2.countNonZero(mask)/(img.size/3)
print('brown pixel percentage:', np.round(ratio*100, 3))

## Script to download the top x post of an instagram profile

In [9]:
# From the script below you will get 2 folders(one with the name of highest and another with the name of lowest). 
# Inside these 2 folders there will be images of those posts downloaded and also a csv file that contains captions, url and number of likes and comments
import csv 
from itertools import islice
from math import ceil
from datetime import datetime
from itertools import dropwhile, takewhile

from instaloader import Instaloader, Profile

# function to scrape
def scrape(name, percentage):
    PROFILE = name        # profile to download from
    X_percentage = percentage    # percentage of posts that should be downloaded

    L = Instaloader()

    
    profile = Profile.from_username(L.context, PROFILE)
    posts = profile.get_posts()
    
    posts_sorted_by_likes = sorted(posts,
                                   key=lambda p: p.likes + p.comments,
                                   reverse=True)  # false means that the order is starting from the least popular post

    # need to write number of likes and comments to csv
    # change the folder name from lowest to highest to get the top posts
    with open('highest/'+PROFILE+'.csv', 'w',encoding='UTF-8', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["caption","urlToPhoto","numOfLikes","numOfComments","pic_name"])
        i = 0
        for post in islice(posts_sorted_by_likes, ceil(profile.mediacount * X_percentage / 100)):
            L.download_pic('highest/',post.url, post.date_local, str(i))  # download the picture
            writer.writerow([post.caption,post.url,post.likes, post.comments,str(i)])
            i += 1
    

In [11]:
scrape("cathrynli",20)  # top 20%

highest/_0.jpg highest/_1.jpg highest/_2.jpg highest/_3.jpg highest/_4.jpg highest/_5.jpg highest/_6.jpg highest/_7.jpg highest/_8.jpg highest/_9.jpg highest/_10.jpg highest/_11.jpg highest/_12.jpg highest/_13.jpg highest/_14.jpg highest/_15.jpg highest/_16.jpg highest/_17.jpg highest/_18.jpg highest/_19.jpg highest/_20.jpg highest/_21.jpg highest/_22.jpg highest/_23.jpg highest/_24.jpg highest/_25.jpg highest/_26.jpg highest/_27.jpg highest/_28.jpg highest/_29.jpg highest/_30.jpg highest/_31.jpg highest/_32.jpg highest/_33.jpg highest/_34.jpg highest/_35.jpg highest/_36.jpg highest/_37.jpg highest/_38.jpg highest/_39.jpg highest/_40.jpg highest/_41.jpg highest/_42.jpg highest/_43.jpg highest/_44.jpg highest/_45.jpg highest/_46.jpg highest/_47.jpg highest/_48.jpg highest/_49.jpg highest/_50.jpg highest/_51.jpg highest/_52.jpg highest/_53.jpg highest/_54.jpg highest/_55.jpg highest/_56.jpg highest/_57.jpg highest/_58.jpg highest/_59.jpg highest/_60.jpg highest/_61.jpg highest/_62.jpg hi

## Get the ratio of skin to picture for all pictures inside the folder

In [4]:
# You will get a list that contains the percentage of skin in that image.
# Feel free to change the colour of the skin
# change the directory to lowest so that you can get another list 
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
directory = "highest"
lst = []  # lists that stores the percentages
for filename in os.listdir(directory):
    if filename[-3:] == "jpg":
        img = cv2.imread("highest/"+filename)
        grid_HSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

        # skin colour
        min_HSV = np.array([0, 58, 30], dtype = "uint8")
        max_HSV = np.array([33, 255, 255], dtype = "uint8")

        mask= cv2.inRange(grid_HSV, min_HSV, max_HSV)

        # find the ratio
        ratio = cv2.countNonZero(mask)/(img.size/3)
        lst.append(ratio*100)

        print(lst)
# you can then use the list to do some visualisations

[42.375651041666664]
[42.375651041666664, 24.383487654320987]
[42.375651041666664, 24.383487654320987, 18.214506172839506]
[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323]
[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664]
[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353]
[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353, 21.36496913580247]
[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353, 21.36496913580247, 15.196825805029482]
[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353, 21.36496913580247, 15.196825805029482, 15.078858024691359]
[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.27799479166666

[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353, 21.36496913580247, 15.196825805029482, 15.078858024691359, 54.80432098765432, 20.07345920138889, 19.75817901234568, 35.45518335468912, 31.680246913580245, 51.65972222222223, 27.49135802469136, 7.932677469135803, 12.936878109452737, 51.51010301546335, 15.213955026455025, 28.99684499314129, 40.05902777777778, 6.323751475715894, 65.2766772747938, 11.852280521262003, 38.06280864197531, 26.402314814814815, 35.61805555555556, 8.06564670138889, 49.41851128472222, 44.85246913580247]
[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353, 21.36496913580247, 15.196825805029482, 15.078858024691359, 54.80432098765432, 20.07345920138889, 19.75817901234568, 35.45518335468912, 31.680246913580245, 51.65972222222223, 27.49135802469136, 7.932677469135803, 12.936878109452737, 51.51010301546335, 15.213955026455025, 28.996844

[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353, 21.36496913580247, 15.196825805029482, 15.078858024691359, 54.80432098765432, 20.07345920138889, 19.75817901234568, 35.45518335468912, 31.680246913580245, 51.65972222222223, 27.49135802469136, 7.932677469135803, 12.936878109452737, 51.51010301546335, 15.213955026455025, 28.99684499314129, 40.05902777777778, 6.323751475715894, 65.2766772747938, 11.852280521262003, 38.06280864197531, 26.402314814814815, 35.61805555555556, 8.06564670138889, 49.41851128472222, 44.85246913580247, 15.930401234567901, 31.202400548696847, 33.82685185185185, 35.26273148148148, 36.066000485297685, 19.197751322751323, 59.09773662551441, 18.765854895991883, 35.44460903494507, 21.45757378472222, 58.30023871527777, 16.224316578483243, 28.56721536351166, 38.68364197530864]
[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353, 21.36496

[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353, 21.36496913580247, 15.196825805029482, 15.078858024691359, 54.80432098765432, 20.07345920138889, 19.75817901234568, 35.45518335468912, 31.680246913580245, 51.65972222222223, 27.49135802469136, 7.932677469135803, 12.936878109452737, 51.51010301546335, 15.213955026455025, 28.99684499314129, 40.05902777777778, 6.323751475715894, 65.2766772747938, 11.852280521262003, 38.06280864197531, 26.402314814814815, 35.61805555555556, 8.06564670138889, 49.41851128472222, 44.85246913580247, 15.930401234567901, 31.202400548696847, 33.82685185185185, 35.26273148148148, 36.066000485297685, 19.197751322751323, 59.09773662551441, 18.765854895991883, 35.44460903494507, 21.45757378472222, 58.30023871527777, 16.224316578483243, 28.56721536351166, 38.68364197530864, 5.419135802469135, 37.16097393689986, 12.87448559670782, 20.543774801587304, 28.35329218106996, 25.02853732638889, 16.7067485143

[42.375651041666664, 24.383487654320987, 18.214506172839506, 18.748070987654323, 34.277994791666664, 7.971720278525353, 21.36496913580247, 15.196825805029482, 15.078858024691359, 54.80432098765432, 20.07345920138889, 19.75817901234568, 35.45518335468912, 31.680246913580245, 51.65972222222223, 27.49135802469136, 7.932677469135803, 12.936878109452737, 51.51010301546335, 15.213955026455025, 28.99684499314129, 40.05902777777778, 6.323751475715894, 65.2766772747938, 11.852280521262003, 38.06280864197531, 26.402314814814815, 35.61805555555556, 8.06564670138889, 49.41851128472222, 44.85246913580247, 15.930401234567901, 31.202400548696847, 33.82685185185185, 35.26273148148148, 36.066000485297685, 19.197751322751323, 59.09773662551441, 18.765854895991883, 35.44460903494507, 21.45757378472222, 58.30023871527777, 16.224316578483243, 28.56721536351166, 38.68364197530864, 5.419135802469135, 37.16097393689986, 12.87448559670782, 20.543774801587304, 28.35329218106996, 25.02853732638889, 16.7067485143

In [5]:
lst

[42.375651041666664,
 24.383487654320987,
 18.214506172839506,
 18.748070987654323,
 34.277994791666664,
 7.971720278525353,
 21.36496913580247,
 15.196825805029482,
 15.078858024691359,
 54.80432098765432,
 20.07345920138889,
 19.75817901234568,
 35.45518335468912,
 31.680246913580245,
 51.65972222222223,
 27.49135802469136,
 7.932677469135803,
 12.936878109452737,
 51.51010301546335,
 15.213955026455025,
 28.99684499314129,
 40.05902777777778,
 6.323751475715894,
 65.2766772747938,
 11.852280521262003,
 38.06280864197531,
 26.402314814814815,
 35.61805555555556,
 8.06564670138889,
 49.41851128472222,
 44.85246913580247,
 15.930401234567901,
 31.202400548696847,
 33.82685185185185,
 35.26273148148148,
 36.066000485297685,
 19.197751322751323,
 59.09773662551441,
 18.765854895991883,
 35.44460903494507,
 21.45757378472222,
 58.30023871527777,
 16.224316578483243,
 28.56721536351166,
 38.68364197530864,
 5.419135802469135,
 37.16097393689986,
 12.87448559670782,
 20.543774801587304,
 28