In [1]:
# Library
import pandas as pd

In [2]:
# Read data
data = pd.read_csv('social media influencers-tiktok.csv', usecols = ['Tiktoker name', 
                                                                     'Tiktok name', 
                                                                     '\nFollowers', 
                                                                     'Views (Avg.)', 
                                                                     'Views (Avg.)', 
                                                                     'Likes (Avg.)', 
                                                                     'Comments (Avg.)', 
                                                                     'Shares (Avg.)'], encoding = "ISO-8859-1")
data.head()

Unnamed: 0,Tiktoker name,Tiktok name,\nFollowers,Views (Avg.),Likes (Avg.),Comments (Avg.),Shares (Avg.)
0,,,,,,,
1,yossikramer,Yossi,177.3K,26.8M,2.2M,30.1K,71.2K
2,mrbeast,MrBeast,55.6M,38.1M,3.8M,19K,7.8K
3,adinross,adin,5.4M,16.6M,2M,10.9K,12.5K
4,daniel.labelle,Daniel LaBelle,27.9M,11.8M,1.7M,6.9K,29.3K


In [3]:
# Rename columns
data.rename(columns = {'Tiktoker name':'Username',
                       'Tiktoker name':'Name',
                       '\nFollowers':'Followers', 
                       'Views (Avg.)':'Views_Avg', 
                       'Likes (Avg.)':'Likes_Avg', 
                       'Comments (Avg.)':'Comments_Avg', 
                       'Shares (Avg.)':'Shares_Avg'}, inplace = True)

### Understanding Data

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1001 entries, 0 to 1000
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Name          1000 non-null   object
 1   Tiktok name   1000 non-null   object
 2   Followers     1000 non-null   object
 3   Views_Avg     1000 non-null   object
 4   Likes_Avg     1000 non-null   object
 5   Comments_Avg  1000 non-null   object
 6   Shares_Avg    1000 non-null   object
dtypes: object(7)
memory usage: 54.9+ KB


In [5]:
data.describe()

Unnamed: 0,Name,Tiktok name,Followers,Views_Avg,Likes_Avg,Comments_Avg,Shares_Avg
count,1000,1000,1000,1000,1000,1000,1000
unique,983,982,345,144,875,364,438
top,ibarrechejavier,Javier Ibarreche,2.1M,1.7M,1.2M,1.2K,1.1K
freq,2,2,19,56,8,48,43


### Handling Missing Value

In [6]:
data.isnull().sum()

Name            1
Tiktok name     1
Followers       1
Views_Avg       1
Likes_Avg       1
Comments_Avg    1
Shares_Avg      1
dtype: int64

In [7]:
data = data.dropna()

In [8]:
data.isnull().sum()

Name            0
Tiktok name     0
Followers       0
Views_Avg       0
Likes_Avg       0
Comments_Avg    0
Shares_Avg      0
dtype: int64

In [9]:
data

Unnamed: 0,Name,Tiktok name,Followers,Views_Avg,Likes_Avg,Comments_Avg,Shares_Avg
1,yossikramer,Yossi,177.3K,26.8M,2.2M,30.1K,71.2K
2,mrbeast,MrBeast,55.6M,38.1M,3.8M,19K,7.8K
3,adinross,adin,5.4M,16.6M,2M,10.9K,12.5K
4,daniel.labelle,Daniel LaBelle,27.9M,11.8M,1.7M,6.9K,29.3K
5,hotspanishmx,HotSpanish,8.2M,19.2M,1.7M,7.4K,9.5K
...,...,...,...,...,...,...,...
996,nathanmccarron0,Nathanâ??s Art,1.6M,1.1M,117.6K,1.4K,1.1K
997,lacha21.1,Lazaro Gonzalez,669.4K,750.3K,118.3K,655,3K
998,hyoga.x,Hyoga,552K,1M,121.9K,1.9K,727
999,rrqr7tatsumaki,Rivaldi Fataah,1.1M,2M,112.5K,1K,414


### Transformation

In [10]:
# Function to change K into 1000 and M into 1000000
def value_to_float(x):
    if type(x) == float or type(x) == int:
        return x
    if 'K' in x:
        if len(x) > 1:
            return float(x.replace('K', '')) * 1000
        return 1000.0
    if 'M' in x:
        if len(x) > 1:
            return float(x.replace('M', '')) * 1000000
        return 1000000.0
    if 'B' in x:
        return float(x.replace('B', '')) * 1000000000
    return 0.0

In [11]:
# Apply function value_to_float
data['Likes_Avg'] = data['Likes_Avg'].apply(value_to_float)
data['Followers'] = data['Followers'].apply(value_to_float)
data['Views_Avg'] = data['Views_Avg'].apply(value_to_float)
data['Comments_Avg'] = data['Comments_Avg'].apply(value_to_float)
data['Shares_Avg'] = data['Shares_Avg'].apply(value_to_float)

In [12]:
data

Unnamed: 0,Name,Tiktok name,Followers,Views_Avg,Likes_Avg,Comments_Avg,Shares_Avg
1,yossikramer,Yossi,177300.0,26800000.0,2200000.0,30100.0,71200.0
2,mrbeast,MrBeast,55600000.0,38100000.0,3800000.0,19000.0,7800.0
3,adinross,adin,5400000.0,16600000.0,2000000.0,10900.0,12500.0
4,daniel.labelle,Daniel LaBelle,27900000.0,11800000.0,1700000.0,6900.0,29300.0
5,hotspanishmx,HotSpanish,8200000.0,19200000.0,1700000.0,7400.0,9500.0
...,...,...,...,...,...,...,...
996,nathanmccarron0,Nathanâ??s Art,1600000.0,1100000.0,117600.0,1400.0,1100.0
997,lacha21.1,Lazaro Gonzalez,669400.0,750300.0,118300.0,0.0,3000.0
998,hyoga.x,Hyoga,552000.0,1000000.0,121900.0,1900.0,0.0
999,rrqr7tatsumaki,Rivaldi Fataah,1100000.0,2000000.0,112500.0,1000.0,0.0


### Engagement Rate

ER = ((n_likes + n_shares + n_comments)/followers)*100

In [24]:
data['ER'] = ((data['Likes_Avg'] + data['Comments_Avg'] + data['Shares_Avg'])/data['Followers'])*100
data['ER'] = round(data['ER'], 2)

In [25]:
data

Unnamed: 0,Name,Tiktok name,Followers,Views_Avg,Likes_Avg,Comments_Avg,Shares_Avg,ER
280,jackktierney,Jack Lawrenson-Jones,29600.0,2000000.0,458200.0,1600.0,1600.0,1558.78
1,yossikramer,Yossi,177300.0,26800000.0,2200000.0,30100.0,71200.0,1297.97
21,poudi,Poudii,149700.0,8100000.0,1200000.0,4000.0,9900.0,810.89
179,queenofontario,amaris,129600.0,3400000.0,721700.0,0.0,1900.0,558.33
985,rafaellabd,Rafaella Dahlem,21200.0,455700.0,100300.0,0.0,6500.0,503.77
792,justingrimes_,Justin Grimes,38200.0,1300000.0,181500.0,1100.0,1100.0,480.89
736,samirasphobia,Samira,38500.0,1100000.0,108100.0,4500.0,0.0,292.47
468,_mrs.grey,_mrs.grey,131800.0,1400000.0,357900.0,1500.0,1200.0,273.6
191,jelena.unforgetable,#jelena,183100.0,6200000.0,434500.0,1600.0,0.0,238.18
175,rinklebinkle,rinkle binkle ð??¼,198400.0,1700000.0,389000.0,4300.0,5400.0,200.96


In [26]:
# Sorting 10 best account
data = data.sort_values(by= "ER", ascending=False).head(10)
data

Unnamed: 0,Name,Tiktok name,Followers,Views_Avg,Likes_Avg,Comments_Avg,Shares_Avg,ER
280,jackktierney,Jack Lawrenson-Jones,29600.0,2000000.0,458200.0,1600.0,1600.0,1558.78
1,yossikramer,Yossi,177300.0,26800000.0,2200000.0,30100.0,71200.0,1297.97
21,poudi,Poudii,149700.0,8100000.0,1200000.0,4000.0,9900.0,810.89
179,queenofontario,amaris,129600.0,3400000.0,721700.0,0.0,1900.0,558.33
985,rafaellabd,Rafaella Dahlem,21200.0,455700.0,100300.0,0.0,6500.0,503.77
792,justingrimes_,Justin Grimes,38200.0,1300000.0,181500.0,1100.0,1100.0,480.89
736,samirasphobia,Samira,38500.0,1100000.0,108100.0,4500.0,0.0,292.47
468,_mrs.grey,_mrs.grey,131800.0,1400000.0,357900.0,1500.0,1200.0,273.6
191,jelena.unforgetable,#jelena,183100.0,6200000.0,434500.0,1600.0,0.0,238.18
175,rinklebinkle,rinkle binkle ð??¼,198400.0,1700000.0,389000.0,4300.0,5400.0,200.96
