# Tito Analytics
---

## Initialize script
run this always

In [None]:
%%capture
from IPython.core.magic import register_line_magic

@register_line_magic
def pip(args):
    """Use pip from the current kernel"""
    from pip import main
    main(args.split())
    
%pip install pandas
%pip install geopy
%pip install openpyxl

from geopy.geocoders import Nominatim
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000)
import datetime
geolocator = Nominatim(user_agent="TitoApp")

class analytics:
    FILES = {        
        "EXCEL_FILE": "TitoDataProd.xlsx",
        "USERS_MOBILE": "Users Mobile",
        "USERS_SETTINGS": "mobile_settings.csv",
        "POIS": "pois.csv",
        "POI_PICTURES": "poi_pictures.csv",
    }
    
    fields = {
        "total_current_users": None,
        "total_MAU_users": None,
        "pins_generated": None,
        "objects_uploaded": None,
        "pins_from_broadcasts": None,
        "object_threes": None,
        "notifications_on_total": None,
        "notifications_on_MAU": None,
        "location_never": None,
        "precise_location_off": None,
        "gallery_permissions_off": None,
        "camera_permissions_off": None,
        "one_time_users": None,
        "users_nyc": None,
        "users_nyc_mau": None,
    }
    
    MAU_date = None
    WAU_date = None
    
    df = None
    users_mobile_df = None
    users_settings_df = None
    pois_df = None
    pictures_df = None
    current_df = None
    
    def __init__(self, date):
        self.init_dates(date)
        self.open_files()
        self.merge_dfs()
        self.format_all_dates()
        self.make_current_users_df()
        self.fill_fields()
        
    def init_dates(self, date):
        date_time_str = f'{date} 23:59:59'
        date_time_from = datetime.datetime.strptime(date_time_str, '%d/%m/%y %H:%M:%S')
        self.MAU_date = date_time_from - datetime.timedelta(28)
        self.WAU_date = date_time_from - datetime.timedelta(7)
        
    def open_sheet(self, sheet_name):
        return pd.read_excel(self.FILES["EXCEL_FILE"], engine='openpyxl', sheet_name=sheet_name)
        
    def open_files(self):
        self.users_mobile_df = self.open_sheet(self.FILES["USERS_MOBILE"])
        self.users_settings_df = self.open_sheet(self.FILES["USERS_SETTINGS"])
        self.pois_df = self.open_sheet(self.FILES["POIS"])
        self.pictures_df = self.open_sheet(self.FILES["POI_PICTURES"])
        
    def merge_dfs(self):
        self.df = pd.merge(self.users_mobile_df, self.users_settings_df.drop(['id', 'created_at', 'updated_at']
                            , axis=1),left_on='id', right_on='user_id', how='left').drop('user_id', axis=1)
      
    def format_date(self, data_frame, column):
        data_frame[column] = pd.to_datetime(data_frame[column], errors='coerce')
    
    def format_all_dates(self):
        self.format_date(self.df, 'last_activity_timestamp')
        self.format_date(self.pois_df, 'created_at')
        self.format_date(self.pictures_df, 'created_at')
        
    def make_current_users_df(self):
        self.df_current = self.df[self.df['mobile_version'] \
                                  .str.match("^[1-9][0-9]*\.\d+\.\d+ \(\d+\)|0.1.0 \(65\)") == True]
        
    def fill_fields(self):
        self.fields['total_current_users'] = self.df_current.shape[0]
        
        t = self.pois_df.apply(lambda x: True if x['created_at'] > self.WAU_date else False, axis=1)
        self.fields['pins_generated'] = len(t[t].index)

        t = self.pois_df.apply(lambda x: True if x['created_at'] > self.WAU_date 
                               and x['accredited_user_id'] > 0 else False, axis=1)
        self.fields['pins_from_broadcasts'] = len(t[t].index)

        t = self.pictures_df.apply(lambda x: True if x['created_at'] > self.WAU_date else False, axis=1)
        self.fields['objects_uploaded'] = len(t[t].index)

        t = self.pictures_df.apply(lambda x: True if x['created_at'] > self.WAU_date 
                                   and x['great_count'] > 0 else False, axis=1)
        self.fields['object_threes'] = len(t[t].index)

        t = self.df_current.apply(lambda x: True if x['notification_permissions'] == 'allowed' else False, axis=1)
        self.fields['notifications_on_total'] = len(t[t].index)

        t = self.df_current.apply(lambda x: True if x['last_activity_timestamp'] > self.MAU_date else False, axis=1)
        self.fields['total_MAU_users'] = len(t[t].index)

        t = self.df_current.apply(lambda x: True if x['notification_permissions'] == 'allowed' 
                                  and x['last_activity_timestamp'] > self.MAU_date else False, axis=1)
        self.fields['notifications_on_MAU'] = len(t[t].index)

        t = self.df_current.apply(lambda x: True if x['location_permissions'] == 'never' else False, axis=1)
        self.fields['location_never'] = len(t[t].index)

        t = self.df_current.apply(lambda x: True if x['precise_location'] == 'OFF' else False, axis=1)
        self.fields['precise_location_off'] = len(t[t].index)

        t = self.df_current.apply(lambda x: True if x['gallery_permissions'] == 'never' 
                                  or x['gallery_permissions'] == 'not-allowed' 
                                  or x['gallery_permissions'] == 'Limited' else False, axis=1)
        self.fields['gallery_permissions_off'] = len(t[t].index)

        t = self.df_current.apply(lambda x: True if x['camera_permissions'] == 'never' 
                                  or x['camera_permissions'] == 'not-allowed' else False, axis=1)
        self.fields['camera_permissions_off'] = len(t[t].index)

        t = self.df_current.apply(lambda x: True if pd.isnull(x['notification_permissions']) 
                                  and pd.isnull(x['location_permissions']) 
                                  and pd.isnull(x['gallery_permissions']) and pd.isnull(x['camera_permissions']) 
                                  and pd.isnull(x['microphone_permissions']) else False, axis=1)
        self.fields['one_time_users'] = len(t[t].index)
        
    def print_analytics(self):
        total_current_users = self.fields['total_current_users']
        total_MAU_users = self.fields['total_MAU_users']
        pins_generated = self.fields['pins_generated']
        objects_uploaded = self.fields['objects_uploaded']
        pins_from_broadcasts = self.fields['pins_from_broadcasts']
        object_threes = self.fields['object_threes']
        notifications_on_total = self.fields['notifications_on_total']
        notifications_on_MAU = self.fields['notifications_on_MAU']
        location_never = self.fields['location_never']
        precise_location_off = self.fields['precise_location_off']
        gallery_permissions_off = self.fields['gallery_permissions_off']
        camera_permissions_off = self.fields['camera_permissions_off']
        one_time_users = self.fields['one_time_users']
        
        print(f"# Total Users: {total_current_users}")
        print(f"# Total MAU Users: {total_MAU_users}")
        print(f"# PINs Generated (All): {pins_generated}")
        print(f"# Objects Uploaded: {objects_uploaded}")
        print(f"# PINs from Broadcasts: {pins_from_broadcasts}")
        print(f"# Objects 3s (make the feed): {object_threes}")
        print(f"# Notifications On: {notifications_on_total}")
        print(f"% Notifications On: {round(notifications_on_total/total_current_users*100, 1)}%")
        print(f"# Notifications On (MAU): {notifications_on_MAU}")
        print(f"% Notifications On (MAU): {round(notifications_on_MAU/total_MAU_users*100,1)}%")
        print(f"# Location: Never: {location_never}")
        print(f"% Location: Never: {round(location_never/total_current_users*100, 1)}%")
        print(f"# Precise Location Off: {precise_location_off}")
        print(f"% Precise Location Off: {round(precise_location_off/total_current_users*100, 1)}%")
        print(f"# Photos: Limited, Never or Not Allowed: {gallery_permissions_off}")
        print(f"% Photos: Limited, Never or Not Allowed: {round(gallery_permissions_off/total_current_users*100, 1)}%")
        print(f"# Camera: Never or Not Allowed: {camera_permissions_off}")
        print(f"% Camera: Never or Not Allowed: {round(camera_permissions_off/total_current_users*100, 1)}%")
        print(f"# Mobile Settings Unavailable (one-time users): {one_time_users}")
        print(f"% Mobile Settings Unavailable (one-time users): {round(one_time_users/total_current_users*100, 1)}%")
        
    def get_location_data(self):
        self.df_current['postcode'] = self.df_current.apply(lambda x: self.get_zip(x), axis=1)

        t = self.df_current.apply(lambda x: True if x['postcode'] is not None else False, axis=1)
        users_nyc = len(t[t].index)

        t = self.df_current.apply(lambda x: True if x['postcode'] is not None 
                                  and x['last_activity_timestamp'] > self.MAU_date else False, axis=1)
        users_nyc_mau = len(t[t].index)

        print(f"# Total NYC users: {users_nyc}")
        print(f"# MAU NYC users: {users_nyc_mau}")
        
    def print_zip(self):
        print("Users per zipcode:")
        with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
            print(self.df_current['postcode'].value_counts())
        
        
    def get_zip(self, x):
        if x['last_lat'] != 0 and not pd.isnull(x['last_lat']) and x['last_lng'] != 0 and not pd.isnull(x['last_lng']):
            last_location = f"{x['last_lat']}, {x['last_lng']}"
        else:
            last_location = f"{x['lat']}, {x['lng']}"
        try:
            loc = geolocator.reverse(last_location)
            if loc.raw['address']['city'] == 'New York':
                return loc.raw['address']['postcode']
        except:
            return None
        
    def get_rem_data(self):
        LAST_ACTIVITY = "last_activity_timestamp"
        NOTIFICATIONS = "notification_permissions"
        LOCATION = "location_permissions"
        today = datetime.datetime.today()

        for index, row in self.df_current.dropna(subset=['last_activity_timestamp']).iterrows():
            user_type = ""
            last_activity_timestamp = row["last_activity_timestamp"]

            if last_activity_timestamp != pd.NaT:
                days_since_last_login = round((today - last_activity_timestamp) / datetime.timedelta(days=1))
                if days_since_last_login < 29:
                    user_type += "MAU"
                elif 29 <= days_since_last_login <= 90:
                    user_type += "QAU"
                elif days_since_last_login > 90:
                    user_type += "Dead"

            user_type += "."

            notification_settings = row[NOTIFICATIONS]
            location_settings = row[LOCATION]

            notifications_on = notification_settings == "allowed"
            location_on = location_settings != "never"

            if notifications_on and location_on:
                user_type += "C1"
            elif notifications_on and not location_on:
                user_type += "C2"
            elif not notifications_on and location_on:
                user_type += "C3"
            elif not notifications_on and not location_on:
                user_type += "C4"
            self.df_current.at[index, 'user_type'] = user_type
        print(self.df_current[['id', 'username', 'email', 'user_type']].set_index('id').sort_values(by=['user_type']))


## Initialize Analytics with date of sheet
always run this cell

In [None]:
a = analytics("17/01/21")

---

## Get Analytics

In [None]:
a.print_analytics()

---

## Get Location Data
takes some time to retreive data

In [None]:
a.get_location_data()

##### Get Zip Data
run this only after running above cell (get_location_data())

In [None]:
a.print_zip()

---

## Get REM data

In [None]:
a.get_rem_data()