<img width="8%" alt="Google Sheets.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Google%20Sheets.png" style="border-radius: 15%">

# Google Sheets - Update BDD Growth

**Tags:** #googlesheets #gsheet #data #naas_drivers #operations #snippet

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook allows to send data to Google Sheets to a Google Sheets spreadsheet.

## Input

### Import libraries

In [None]:
from naas_drivers import gsheet
import pandas as pd
import os
from datetime import date
import naas_data_product
try:
    import openai
except:
    !pip install openai --user
    import openai
import time

### Setup variables
**Inputs**
- `input_dir`: Input directory to retrieve file from.
- `file_reactions`: Name of the file with reactions to be retrieved.
- `file_comments`: Name of the file with comments to be retrieved.

**Outputs**
- `spreadsheet_url`: Google Sheets spreadsheet URL.
- `sheet_name`: Google Sheets sheet name.
- `append`: If False, data will be canceled and replaced.

In [None]:
# Inputs
input_dir = os.path.join(naas_data_product.OUTPUTS_PATH, "growth-engine", date.today().isoformat())
input_file = "linkedin_interactions"

# Outputs
spreadsheet_url = naas.secret.get("ABI_SPREADSHEET") or "YOUR_GOOGLE_SPREADSHEET_URL"
sheet_name = "GROWTH"
append = False
output_file = "growth"

## Model

### Get DB interactions

In [None]:
df_interactions = pload(input_dir, input_file)    
print('🗂️ Interactions:', len(df_interactions))
df_interactions.head(1)

### Get interactions by profile and scenario

In [None]:
def get_interactions_by_profile_and_scenario(
    df_init,
):
    # Init
    df = df_init.copy()
    df_interactions = pd.DataFrame()
    
    # Cleaning
    to_select = [
        "SCENARIO",
        "PROFILE_URL",
        "CONTENT_TITLE",
        "CONTENT_URL",
        "INTERACTION",
        "INTERACTION_CONTENT"
    ]
    df = df[to_select].sort_values(by="PROFILE_URL").reset_index(drop=True)
    df["INTERACTION_TEXT"] = ""
    df.loc[df["INTERACTION"] == "POST_REACTION", "INTERACTION_TEXT"] = df["INTERACTION_CONTENT"].str.capitalize() + " '" + df["CONTENT_TITLE"].str.strip() + "' (" + df["CONTENT_URL"] + ")"
    df.loc[df["INTERACTION"] == "POST_COMMENT", "INTERACTION_TEXT"] = "Comment '" + df["INTERACTION_CONTENT"].str.capitalize() + "' on '" + df["CONTENT_TITLE"].str.strip() + "' (" + df["CONTENT_URL"] + ")"

    # Create interactions by profile
    df_keys = df_init.copy()
    df_keys = df_keys[["SCENARIO", "PROFILE_URL"]].drop_duplicates()
    for row in df_keys.itertuples():
        index = row.Index
        scenario = row.SCENARIO
        profile_url = row.PROFILE_URL
        tmp_df = df.copy()
        tmp_df = tmp_df[(tmp_df["SCENARIO"] == scenario) & (tmp_df["PROFILE_URL"] == profile_url)].reset_index(drop=True)
        interests = ""
        for row in tmp_df.itertuples():
            interaction_text = row.INTERACTION_TEXT
            interests = f"{interests}{interaction_text}, "
        df_keys.loc[index, "INTERACTIONS"] = interests.strip()
    return df_keys

df_interaction_text = get_interactions_by_profile_and_scenario(df_interactions)
df_interaction_text.head(1)

### Get last interaction date

In [None]:
df_last_interaction = df_interactions[["SCENARIO", "PROFILE_URL", "PUBLISHED_DATE"]].drop_duplicates().drop_duplicates(["SCENARIO", "PROFILE_URL"])
df_last_interaction.head(1)

### Create Growth dataframe

In [None]:
def create_growth_db(df_init):
    # Init
    df = df_init.copy()
    
    # Get cohort
    df_cohort = df_init[["SCENARIO", "PROFILE_URL"]].drop_duplicates(keep='last')
    current_scenario = df_cohort.loc[0, "SCENARIO"]
    df_cohort.loc[df_cohort["SCENARIO"] == current_scenario, "SCENARIO"] = "NEW"
    cohorts = df_cohort.set_index('PROFILE_URL')['SCENARIO'].to_dict()
    
    # Add cohort to df
    df["COHORT"] = df["PROFILE_URL"].map(cohorts)
    
    # Groupby
    to_group = [
        "ENTITY",
        "SCENARIO",
        "PLATFORM",
        "FULLNAME",
        "COHORT",
        "OCCUPATION",
        "PROFILE_URL",
    ]
    to_agg = {
        "INTERACTION_SCORE": "sum"
    }
    df = df.groupby(to_group, as_index=False).agg(to_agg)

    # Merge data
    df = pd.merge(df, df_interaction_text, how="left")
    df = pd.merge(df, df_last_interaction, how="left")

    to_rename = {
        "PUBLISHED_DATE": "LAST_INTERACTION_DATE"
    }
    df = df.rename(columns=to_rename)
    df["LAST_INTERACTION_DATE"] = pd.to_datetime(df["LAST_INTERACTION_DATE"].str[:-5]).dt.strftime("%a. %d %b.")
    df = df.sort_values(by=["SCENARIO", "INTERACTION_SCORE", "LAST_INTERACTION_DATE"], ascending=[False, False, False])
    return df.reset_index(drop=True)

df_growth = create_growth_db(df_interactions)
print("🚀 Growth DB:", len(df_growth))
df_growth.head(5)

## Output

### Save data

In [None]:
pdump(input_dir, df_growth, output_file)

### Send data to Google Sheets spreadsheet

In [None]:
gsheet.connect(spreadsheet_url).send(sheet_name=sheet_name, data=df_growth, append=append)