In [1]:
from getpass import getpass
import pandas as pd
import numpy as np
import seaborn as sns

# Recommender System Model

> `database`:
- import
- preprocess

> `input`: 
- música
- gênero

> `processamento`:
- buscar na base de dados de música atual, encontrar artista, separar
- filtrar DB por target (recomendável)
- filtrar por cluster POR FEATURES
- análise word2vec gêneros
- análise word2vec para a música do input
- calcular dist cosine

- standardizar dados
- incluir no sistema de recomendação
- filtrar os 10 mais semelhantes

> `output`:
- retornar página com top 5 músicas e artistas
- links para ouvir a música

# Pipeline

In [17]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer, KNNImputer
from scipy.spatial import distance
from sklearn.cluster import KMeans

In [365]:
# feat_selection = ['danceability','energy','key','loudness','mode','speechiness','acousticness',
#          'instrumentalness','liveness','valence','tempo','time_signature']

# feat_selection = ['danceability','energy','loudness','speechiness','acousticness',
#                  'instrumentalness','liveness','valence']

feat_selection = ['danceability','energy','speechiness','acousticness',
         'instrumentalness','valence']

In [380]:
pipeline = Pipeline(steps=[('features_selection', AudioFeatures()),
                           ('missing', KNNImputer(n_neighbors=30)),
                           ('preprocessing', StandardScaler()),
                           ('feature_clusterization', KMeans(30))])

In [381]:
songs = pd.read_csv('../data/final/songs.csv')
pipeline.fit(songs)
X = pipeline.named_steps['features_selection'].transform(songs)
pipeline.named_steps['preprocessing'].transform(X)
songs['cluster'] = pipeline.predict(X)
songs.to_csv('../data/final/songs.csv', index=False)

# Input & Validation

## Speech recognition

In [161]:
user_input=check_input(song_input=get_audio('title'), artist_input=get_audio('artist'))

Tell us the title of a song you like :)
Listening... (you've got 4 seconds)
Processing...
Tell us the artist who sings that song
Listening... (you've got 4 seconds)
Processing...
You chose "good news" by Mac Miller, right? (y/n)y


## Written song

In [382]:
user_input=check_input(song_input=get_text('title'), artist_input=get_text('artist'))

Tell us the title of a song you like :)photograph
Tell us the artist who sings that songnickelback
You chose "Photograph" by Nickelback, right? (y/n)y


# Processing

In [383]:
# processing the input and saving to dabase new information
process = target_process(user_input)
target_song = process[0]
target_cluster = process[1]

found in the database


In [384]:
target_song

Unnamed: 0,track,artist,tracks_uri,decade,billboard,art_genres,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,cluster
29490,Photograph,Nickelback,spotify:track:3hb2ScEVkGchcAQqrPLP0R,2000.0,1.0,"['alternative metal', 'canadian rock', 'nu met...",0.513,0.877,8.0,-3.756,1.0,0.0292,0.000932,0.000163,0.136,0.389,145.919,4.0,11


In [385]:
target_cluster

11

In [386]:
# now it's time to filter by the 10 nearest songs and artists
cluster_songs = get_recomm_songs(target_cluster)
recomm_songs = cluster_songs.copy().reset_index(drop=True)

# defining X and y 
X = pipeline.named_steps['features_selection'].transform(recomm_songs)
y = pipeline.named_steps['features_selection'].transform(target_song)

#Standardizing
X_std = pd.DataFrame(pipeline.named_steps['preprocessing'].transform(X), columns=X.columns)
y_std = pd.DataFrame(pipeline.named_steps['preprocessing'].transform(y), columns=y.columns)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cluster_songs['is_recommended'] = cluster_songs.artist.apply(lambda x: 1 if x in list(recomm_artists.artist) else 0)


# Ball Tree

In [None]:
#Calculate distance based on cluster center

In [387]:
from sklearn.neighbors import BallTree

#Calculating the distance between the target song to all the rest of the same cluster
tree = BallTree(X_std, metric= 'euclidean')    
dist, ind = tree.query(y, k=10)    

# Recommendation

In [None]:
# Think of a way to speed up this process: 

In [388]:
# retrieving artist's popularity info 
final_recomm = get_pop_link(recomm_songs)
final_target = get_pop_link(target_song)

In [389]:
# data to be shown
final_feat = ['track', 'artist','art_genres','link','cluster']

# Tuning by some other parameters and showing the final results
recommendation = final_recomm.loc[list(ind[0]),:].sort_values(by=['liveness','popularity'], ascending=False)

# showing final results
recom = pd.concat([final_target[final_feat],recommendation[final_feat]]).reset_index(drop=True)
recom

Unnamed: 0,track,artist,art_genres,link,cluster
0,Photograph,Nickelback,"['alternative metal', 'canadian rock', 'nu met...",https://open.spotify.com/track/3hb2ScEVkGchcAQ...,11
1,Damn I Wish I Was Your Lover,Sophie B. Hawkins,"['lilith', 'new wave pop', 'pop rock']",https://open.spotify.com/track/0RmEn2jcbyrLV4e...,11
2,Aao Milo Chalo,Shaan,"['desi pop', 'filmi', 'modern bollywood', 'sufi']",https://open.spotify.com/track/6ocOeSjnWPcSTaS...,11
3,Little Darlin',Sheila,['background music'],https://open.spotify.com/track/3t7iwbVkBY3vT8z...,11
4,When I Think About Angels,Jamie O'Neal,"['contemporary country', 'country', 'country d...",https://open.spotify.com/track/6X1MOfJSjBT54Vc...,11
5,Slow Dance In A Parking Lot,Jordan Davis,['contemporary country'],https://open.spotify.com/track/6MBUUSIWCzaXW4q...,11
6,Rock This Town,Daniel Johnston,"['alternative rock', 'anti-folk', 'freak folk'...",https://open.spotify.com/track/3PaSRhmdsbY3GTQ...,11
7,Barlights,fun.,"['axe', 'pagode', 'partido alto', 'samba', 'sa...",https://open.spotify.com/track/4HBeHxm71PBKrc6...,11
8,Rani,Faun,"['hurdy-gurdy', 'medieval folk', 'medieval roc...",https://open.spotify.com/track/4f5hVnrSBAPuVfE...,11
9,California Girls,David Lee Roth,"['album rock', 'classic rock', 'glam metal', '...",https://open.spotify.com/track/4H3vuLX59XPqdtT...,11


# Classes & Functions

## Pipeline

In [379]:
class AudioFeatures():
    
    def __init__(self):     
        pass
    
    def fit(self, X, y=None):
        '''
        `.fit()` method needs to return the object itself
        '''
        return self
    
    def transform(self, X):
        '''
        `.transform()` method needs to return a transformed data like X
        '''
        # feat_selection = ['danceability','energy','key','loudness','mode','speechiness','acousticness',
        #          'instrumentalness','liveness','valence','tempo','time_signature']
        
        # feat_selection = ['danceability','energy','loudness','speechiness','acousticness',
        #          'instrumentalness','liveness','valence']
        
        feat_selection = ['danceability','energy','speechiness','acousticness',
         'instrumentalness','valence']
        
        X = X[feat_selection ]
        
        return X
        

In [363]:
songs.columns

Index(['track', 'artist', 'tracks_uri', 'decade', 'billboard', 'art_genres',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'time_signature', 'cluster'],
      dtype='object')

## Clean Artist's names 

In [71]:
def clean_name(df:pd.DataFrame, column_name:str)-> pd.DataFrame:
    '''
    This function cleans the artist's name column of a dataframe and 
    returns the df with the respective column containing only the main name
    '''
    
    import re
    
    df = df.reset_index(drop=True)

    for i in range(df.shape[0]):
        string = df.loc[i,'artist']
        pattern ='(?:Feat(?:\. |uring )|, |\/|\+|&| [Xx] | \(|\)| [Ww]ith | And | y )'
        artists = re.split(pattern, string)
        df.loc[i, 'artist'] = artists[0]
    return df

## Spotify API functions

In [73]:
def access_spotify():
    '''
    this function initializes spotify access 
    '''
    import spotipy
    import spotipy.util as util
    from spotipy.oauth2 import SpotifyClientCredentials
    
    # Credentials
    client_id= '1337a27731b840f98acd0c79f0d011f3'
    client_secret = '98f2df2fdde44f40ad768a8142b20074'
    response_type = 'code'
    redirect_uri = 'https://aceworld.com.br/'
    state= 'https%3A%2F%2Faceworld.com.br%2F'
    scope = 'playlist-read-private scope'
    username = 'dnekdiptrsu4rr1heu145y07j'
    playlist = 'spotify:playlist:2QDyvRV0Ct7bGg3AREkL6P'

    #Accessing with token
    scope = 'user-library-read playlist-read-private'
    try:
        token = util.prompt_for_user_token(username, scope,client_id=client_id, 
                                           client_secret=client_secret, 
                                           redirect_uri=redirect_uri)
        sp=spotipy.Spotify(auth= token)
    except:
        print('Token is not accesible for ' + username)

    return sp

In [181]:
def get_artist_stats(df=False, artist_name: str = 'None', billboard_col: bool = False)-> pd.DataFrame:
    
    '''
    This function receives a dataframe containing a column with artists' names ('artist')
    or a single artist name and the spotify authenticated access to make the requests, 
    and returns a new dataframe, containing the spotify attributes for that artist:
        - artist spotify id (uri)
        - number of followers
        - genres
        - popularity
    
    <parameters>
    df: a DataFrame or 'False' if the request will be for a single artist
    billboard_col: True if the input df already contains a target 
                    column named 'billboard'.
    spotify_access: function that accesses spotify authorization token, 
                    by default: access_spotify()
    '''
    import time
    
    sp = access_spotify()
    artists=[]
    uris = []
    followers = []
    genres =[]
    popularities = []
    error_artists = []
    billboard_targets = []
    
    if df == False and artist_name != 'None':
        artist = artist_name
        q = f'artist:{artist}'
        try:
            results = sp.search(q=q, limit=1, type='artist')['artists']['items'][0]
        except:
            try:
                sp=access_spotify()
                results = sp.search(q=q, limit=1, type='artist')['artists']['items'][0]
            except:
                pass
            
        df_final = pd.DataFrame({'artist': artist,
                     'uri': results['uri'],
                     'followers': results['followers']['total'],
                     'genres': [results['genres']],
                     'popularity': results['popularity'],
                     'billboard': ''})
        
    else: 
        for i in range(df.index[0], df.shape[0]):
            artist = df.loc[i, 'artist']
            q = f'artist:{artist}'

            try:
                results = sp.search(q=q, limit=1, type='artist')['artists']['items'][0]
                print (artist)
            except:
                try:
                    print ('refreshing token')
                    sp=access_spotify()
                    results = sp.search(q=q, limit=1, type='artist')['artists']['items'][0]
                except:
                    print ('error:', artist)
                    pass

            artists.append(artist)
            uris.append(results['uri'])
            followers.append(results['followers']['total'])
            genres.append(results['genres'])
            popularities.append(results['popularity'])
            # if input df contains 'billboard' column
            if billboard_col == True:
                billboard_targets.append(df.loc[i, 'billboard'])
            else:
                pass

            df_2 = pd.DataFrame({'artist': artists,
                                 'uri': uris,
                                 'followers': followers,
                                 'genres': genres,
                                 'popularity': popularities,
                                 'billboard': billboard_targets})
            # saving a version to .csv
            df_2.to_csv('data/processed/artists_stats_process.csv', index=False)

    # saving final version to .csv    
        df_final = df_2
        df_final.to_csv('../data/processed/artists_stats_final.csv', index=False)

    return df_final

In [201]:
def get_audio_features(df=False, track_title:str='None', artist_name:str='None')-> pd.DataFrame:
    '''
    This function receives a dataframe containing tracks' Spotify uris 
    or a single track title and
    and returns a dataframe with the track's audio features.
    
    <parameters>
    df: a DataFrame or 'False' if the request will be made for a single track
    spotify_access: function that accesses spotify authorization token, 
                    by default: access_spotify()
    '''

    feat_df = pd.DataFrame()
    pop=[]
    art_genres=[]
    cluster=[]
    sp = access_spotify()
    
    if df == False :
        q= f'track:{track_title} artist:{artist_name}'    
        results = sp.search(q=q, limit=1, type='track')
        
        try:
            track_title =results['tracks']['items'][0]['name']
        except: 
            print('Couldn\'t find this track in spotify. Did you type everything correctly?')
        tracks_uri = results['tracks']['items'][0]['uri']
        artist_name = results['tracks']['items'][0]['artists'][0]['name']
        artist_uri = results['tracks']['items'][0]['artists'][0]['uri'] 
        
        audio_feat = sp.audio_features(tracks_uri)
        art_genres = sp.artist(artist_uri)['genres']
        cluster= ''
        
        new_df = pd.concat([pd.DataFrame([[
            track_title, artist_name,tracks_uri, artist_uri, art_genres,cluster]], 
            columns=['track','artist','tracks_uri','artist_uri','art_genres', 'cluster']), 
                            pd.DataFrame(audio_feat)], axis=1)
    else:
        saved_uris = list(df.tracks_uri)

        for uri in saved_uris:
            feat_df = feat_df.append(pd.DataFrame(sp.audio_features(uri)))
            track_req=sp.track(uri)
            pop.append(track_req['popularity'])
            art_genres.append(sp.artist(track_req['artists'][0]['uri'])['genres'])
            cluster.append('')

        new_df = pd.concat([df, feat_df.reset_index(drop=True), 
                            pd.DataFrame(pop, columns=['popularity']),
                            pd.DataFrame(art_genres, columns=['art_genres']),
                            pd.DataFrame(cluster, columns=['cluster'])], axis=1)
    
    
    new_df = new_df [['track', 'artist', 'tracks_uri', 'art_genres',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'time_signature', 'cluster']]
    
    return new_df

In [198]:
def get_pop_link (df):
    
    sp = access_spotify()
    index_list = list(df.index)
    popularity = []
    url =[]
    
    df = df.reset_index(drop=True)
    for artist in df.artist:
        try:
            result= sp.search(q=f"artist:{artist}", limit=1, type='artist')
            url.append(result['artists']['items'][0]['external_urls']['spotify'])
            popularity.append(result['artists']['items'][0]['popularity'])
        except: 
            url.append("")
            popularity.append("")
            
    df_req = pd.DataFrame({'popularity': popularity,
                           'link':url})

    df_final = pd.concat([df, df_req], axis=1)
    df_final.index=index_list
    
    return df_final

In [336]:
def get_pop_link (df):
    
    sp = access_spotify()
    index_list = list(df.index)
    popularity = []
    url = 'https://open.spotify.com/'
    
    df = df.reset_index(drop=True)
    for artist in df.artist:
        try:
            result= sp.search(q=f"artist:{artist}", limit=1, type='artist')
            popularity.append(result['artists']['items'][0]['popularity'])
        except: 
            popularity.append("")
            
    df_req = pd.DataFrame({'popularity': popularity})
    df_req['link'] = df.tracks_uri.apply(lambda x: 'track/'.join ([url, x.split(':')[-1]]))

    df_final = pd.concat([df, df_req], axis=1)
    df_final.index=index_list
    
    return df_final

In [None]:
def artist_popularity_link(df):
    # Adding new target based on number of followers:
    
    artists=pd.read_csv('../data/final/artists.csv')
    artist_popularities = []
    url = 'https://open.spotify.com/'

    for artist in list(df_temp.artist):
        try:
            art_index = artists.query(f'artist=="{artist}"').index[0]        
            artist_popularities.append(artists.loc[art_index,'popularity'])
        except:
            artist_popularities.append("")
    df['popularity'] = artist_pop
    df['link'] = df.tracks_uri.apply(lambda x: 'track/'.join ([url, x.split(':')[-1]]))
    
    return df

## Billboard API functions

In [77]:
def get_billboard_charts_data(chart_types: list) -> pd.DataFrame:
    '''
    This function retrieves data from the given list of chart types:
        - song title
        - artist name
        - chart name
    > for more information about chart_types: access 'billboard.charts()'
    '''
    
    df=pd.DataFrame()
    title_list=[]
    artist_list=[]
    chart_name=[]

    for chart_type in chart_types:
        chart = billboard.ChartData(chart_type)
        for song in chart:
            title_list.append(song.title)
            artist_list.append(song.artist)
            chart_name.append(chart.name)

        df = pd.DataFrame({'title':title_list, 
                       'artist':artist_list, 
                       'chart':chart_name})
        df.to_csv('../data/raw/billboard_artists_process.csv', index=False)
    
    df=df.drop_duplicates()
    df.to_csv('../data/raw/billboard_artists.csv', index=False)
    
    return df

## Recommendation System Flow

In [78]:
def artist_recommended(df):
    # Adding new target based on number of followers:
    df['is_recommended'] = df.followers.apply(lambda x: 0 if x > 400000 else 1)
    return df

In [79]:
def get_audio(audio_type:str)-> str:
    '''
    this function receives a speaked audio and transforms it into written text
    you must specify the type between: 'artist' or 'title'
    '''
    import speech_recognition as sr
    import time
    
    r = sr.Recognizer()
    sec = 4
    
    if audio_type == 'title':
        print(f'Tell us the title of a song you like :)')
    elif audio_type == 'artist':
        print(f'Tell us the artist who sings that song')
    else:
        raise ValueError ('audio_type must be "artist" or "title"')
    
    with sr.Microphone() as source:
        # read the audio data from the default microphone
        print(f"Listening... (you've got {sec} seconds)")
        try:
            audio_data = r.record(source, duration=sec)
        except:
            print(f"I didn't hear you, would you repeat please?")
            print(f"Listening... (you've got {sec} seconds)")
        # convert speech to text
        print("Processing...")
        user_input = r.recognize_google(audio_data)
        
        return user_input

In [80]:
def get_text(text_type:str)-> str:
    '''
    this function receives a written text
    you must specify the type between: 'artist' or 'title'
    '''
    if text_type == 'title':
        user_input = str(input(f'Tell us the title of a song you like :)'))
    elif text_type == 'artist':
        user_input = str(input(f'Tell us the artist who sings that song'))
    else:
        raise ValueError ('audio_type must be "artist" or "title"')    

    return user_input

In [81]:
def check_input(song_input: str, artist_input: str)-> list:
    '''
    this function receives an input and confirms if the title was well captured.
    returns a list containing:
        - in position [0]: string (the song title)
        - in position [1]: string (the artist name)
        - in position [2]: bool (0 if not found in database, 1 if found in database)
    '''
    import textdistance
    import time
    n=4

    # assuring if it got the song correctly:     
    try:
        song_lev_dists = songs.track.apply(lambda x:textdistance.levenshtein.normalized_similarity(
                           song_input, x)).sort_values(ascending=False).head(n)
        song_recomm_index = list(song_lev_dists.index)
        song_recomm = songs.loc[song_recomm_index,['track', 'artist']]
        song_recomm['song_dist']=song_lev_dists
        song_recomm['art_dist']= song_recomm.artist.apply(lambda x:textdistance.levenshtein.normalized_similarity(
                           artist_input, x))
        final_recomm = song_recomm.sort_values(by=['song_dist', 'art_dist'], ascending=False)
        final_recomm = final_recomm.query('art_dist > 0.5 and song_dist > 0.5').head(1)
        final_recomm_song = songs.loc[final_recomm.index[0],'track']
        final_recomm_artist = songs.loc[final_recomm.index[0],'artist']    

        # assuring the artist is the same
        if song_lev_dists[final_recomm.index[0]] >= 0.6:
            validate = str(input(f'You chose "{final_recomm_song}" by {final_recomm_artist}, right? (y/n)'))

        elif song_lev_dists[final_recomm.index[0]] < 0.6 and song_lev_dists[final_recomm.index[0]] >= 0.4:
            validate = str(input(f'Did you mean: "{final_recomm_song}" by {final_recomm_artist}? (y/n)'))

        if validate in ['y', 'yes', 'Y', 'Yes', 'YES','sim','Sim','S', 's','SIM']:
            return [final_recomm_song, final_recomm_artist, 1]
        else:
            ## Look for 
            return [song_input, artist_input, 0]
    except:
        validate = str(input(f'You chose "{song_input}" by {artist_input}, right? (y/n)'))
        if validate in ['y', 'yes', 'Y', 'Yes', 'YES','sim','Sim','S', 's','SIM']:
            return [song_input, artist_input, 0]
        else:
            print("We're sorry, we couldn't find this song :(")

In [224]:
def target_process(user_input: list)-> list:
    '''
    This function processes the target song, and clusterizes it given its features
    returns a list containing:
        - target_song
        - target_cluster
    It also saves to the database new songs/artists.
    '''
    
    songs = pd.read_csv('../data/final/songs.csv')
    artists = pd.read_csv('../data/final/artists.csv')

    # look for the song in the songs database
    if user_input[2] == 1:
        target_song = songs.query(f'track == "{user_input[0]}" & artist == "{user_input[1]}"')
        target_index = target_song.index[0]
        target_cluster =  songs.loc[target_index, 'cluster']
        target_artist = songs.loc[target_index, 'artist']
        print('found in the database')
        
    else:
        print('not found in the database, retrieving infos from spotify')

        # look for song in spotify
        target_song = get_audio_features(track_title=user_input[0], artist_name=user_input[1])
        target_index = target_song.index[0]
        pipeline.transform(target_song)
        target_song['cluster'] = pipeline.predict(target_song)
        target_cluster =  target_song.loc[target_index, 'cluster']
        
        # adding new song to songs database
        if target_song.track[0] not in list(songs.track):
            songs = pd.concat([songs, target_song], axis=0).reset_index(drop=True)
            songs.to_csv('../data/final/songs.csv', index=False)

        #ADDING new artist to artist database_
        target_artist = target_song.artist[0]
        new_art = artist_recommended(get_artist_stats(artist_name=target_artist))
        if new_art.artist[0] not in list(artists.artist):
            artists = pd.concat([artists, new_art]).reset_index(drop=True)
            artists.to_csv('../data/final/artists.csv', index=False)        
        print("we're all set")
    
    return [target_song, target_cluster]

In [116]:
def get_recomm_songs (target_cluster: int) -> pd.DataFrame:
    '''
    this function receives the target cluster label 
    and returns the songs dataframe filtered by:
        - cluster 
        - target artists (non famous artists)
    '''
    artists = pd.read_csv('../data/final/artists.csv')
    songs = pd.read_csv('../data/final/songs.csv')
    
    # filter song dataset by target artists
    recomm_artists = artists.query('is_recommended == 1').loc[:,['artist', 'followers', 'popularity']]

    # filter songs dataset by cluster
    cluster_songs = songs.query(f'cluster == {target_cluster}')

    # filter songs by artist (is_recommended or not)
    cluster_songs['is_recommended'] = cluster_songs.artist.apply(lambda x: 1 if x in list(recomm_artists.artist) else 0)
    cluster_songs = cluster_songs.query("is_recommended == 1 & decade >= 1980 & art_genres!='[]'")
    
    return cluster_songs

In [313]:
def artist_popularity_link(df):
    # Adding new target based on number of followers:
    
    artists=pd.read_csv('../data/final/artists.csv')
    artist_popularities = []
    url = 'https://open.spotify.com/'
    
    df_temp = df.copy()
    df_temp['artist'] = df.artist.apply(lambda x: x.replace('"',"'"))

    for artist in list(df_temp.artist):
        art_index = artists.query(f'artist=="{artist}"').index[0]
        
        artist_popularities.append(artists.loc[art_index,'popularity'])
    df['popularity'] = artist_pop
    df['link'] = df.tracks_uri.apply(lambda x: 'track/'.join ([url, x.split(':')[-1]]))
    
    return df

## Other

In [84]:
def remove_outlier(df_in:pd.DataFrame, col_name:str):
    '''
    this function accepts a dataframe, removes outliers, 
    returns a list, containing:
        - [0] new dataframe filtering outliers
        - [1] tuple, containing low, high fences of outliers
    '''
    q1 = df_in[col_name].quantile(0.25)
    q3 = df_in[col_name].quantile(0.75)
    iqr = q3-q1 #Interquartile range
    fence_low  = q1-1.5*iqr
    fence_high = q3+1.5*iqr
    df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
    return [df_out, (fence_low, fence_high)]