# Analíticas de Instagram - Competencia

#### Meses: Marzo, Abril y Mayo 

#### Se necesita:
- Número de likes por post 
- Número de comentarios por post 
- Número de seguidores 

#### Se desea:
- Número de post publicados en cada mes
- Interacción promedio por post (Likes + comentarios / Número de post publicados durante los 3 meses)

## Importamos las librerías necesarias

In [7]:
import random
from time import sleep
import datetime
import json
from igramscraper.instagram import Instagram
from functools import reduce
import pandas as pd

## Inicializamos la instancia de la librería de scraping

In [8]:
instagram = Instagram()

## Función para filtrar posts que no son de marzo, abril o mayo del 2020

In [9]:
def media_filter(media, year, start_month, end_month):
    created_at_datetime = datetime.datetime.fromtimestamp(media.created_time)
    media_year = created_at_datetime.year
    media_month = created_at_datetime.month
    return media_year == year and start_month <= media_month <= end_month

## Definimos la función para pedir al API, reservando un tiempo adecuado entre requests

In [18]:
def get_info_from_api(username, media_count, counter):
    results = dict()
    results['account'] = instagram.get_account(username)
    # We sleep between calls
    sleep(random.uniform(15,30))
    # We filter the media that doesn't meet the requirements
    medias = instagram.get_medias(username, media_count)
    results['medias'] = list(filter(lambda media: media_filter(media, 2020,10,10), medias))
    return results

## Definimos los username de las cuentas que se revisarán

In [23]:
account_names = [ 'natuchipsve', 'platanazovenezuela',]

## Obtenemos la información del API, esperando entre requests

In [24]:
# We define a counter to sleep between requests
counter = 0
accounts_info = []
for name in account_names:
    print(f"Starting to check: {name}. We're at {counter+1}/{len(account_names)}")
    # We sleep between calls
    if(counter != 0):
        print('Waiting...')
        sleep(random.uniform(15,30))
    response = get_info_from_api(name, 160, counter)
    # We map out the data
    media_count = len(response['medias'])
    account_info = dict()
    account_info["username"] = name
    account_info["likes_sum"] = reduce(lambda accum,item: item.likes_count + accum, response['medias'], 0)
    account_info["comments_sum"] = reduce(lambda accum,item: item.comments_count + accum, response['medias'], 0)
    account_info['follower_count'] = response['account'].followed_by_count
    # account_info['july_posts_count'] = len(list(filter(lambda media: media_filter(media, 2020,7,7), response['medias'])))
    # account_info['august_posts_count'] = len(list(filter(lambda media: media_filter(media, 2020,8,8), response['medias'])))
    account_info['october_posts_count'] = len(list(filter(lambda media: media_filter(media, 2020,10,10), response['medias'])))
    # if(account_info['october_posts_count'] == 0 or account_info['july_posts_count'] == 0 or account_info['august_posts_count'] == 0):
    if(account_info['october_posts_count'] == 0):
        print(f"Might be missing some posts here at account: {name} ")
    if(media_count != 0):
        account_info['mean_likes'] = account_info["likes_sum"] / media_count
        account_info['mean_comments'] = account_info["comments_sum"] / media_count
        account_info['mean_interaction'] = ( account_info['likes_sum'] + account_info['comments_sum'] ) / media_count
    accounts_info.append(account_info)
    print(f"Finished checking: {name}")
    # We wait 5-10 minutes after 10 requests
    if counter != 0 and counter % 5 == 0:
        print('Waiting...')
        sleep(random.uniform(300,600))
    # We add to the counter
    counter += 1

Starting to check: natuchipsve. We're at 1/2
Finished checking: natuchipsve
Starting to check: platanazovenezuela. We're at 2/2
Waiting...


InstagramNotFoundException: Account with given username does not exist., Code:404

## Metemos la información en un DataFrame de Pandas

In [None]:
df = pd.DataFrame()
# keys = ["username", "likes_sum", "comments_sum", 'follower_count', 'july_posts_count', 'october_posts_count', 'september_posts_count', 'mean_likes', 'mean_comments', 'mean_interaction']
keys = ["username", "likes_sum", "comments_sum", 'follower_count', 'october_posts_count', 'mean_likes', 'mean_comments', 'mean_interaction']
for key in keys:
    df[key] = pd.Series(list(map(lambda info: info[key] if key in info.keys() else 0, accounts_info)))

## Exportamos a un CSV

In [None]:
df.to_excel('competencia_instagram_iselitas_oct_2020.xlsx')