<img width="8%" alt="Google Sheets.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Google%20Sheets.png" style="border-radius: 15%">

# Google Sheets - Send content database to spreadsheet

**Tags:** #googlesheets #gsheet #data #naas_drivers #operations #snippet

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook streamlines the process of getting your content published, enhancing it with topics, and sending it to a Google Sheets spreadsheet.

## Input

### Import libraries

In [None]:
from naas_drivers import gsheet
import pandas as pd
import os
from datetime import date
import naas_data_product

### Setup variables
**Inputs**
- `input_dir`: Input directory to retrieve file from.
- `file_name`: Name of the file to be retrieved.
- `openai_api_key`: OpenAI API Key.

**Outputs**
- `spreadsheet_url`: Google Sheets spreadsheet URL.
- `sheet_name`: Google Sheets sheet name.
- `output_dir`: Output directory
- `file_content`: Name of the file to be saved in your local.

In [None]:
# Inputs
input_dir = os.path.join(naas_data_product.OUTPUTS_PATH, "content-engine", date.today().isoformat())
file_name = "linkedin_posts"
openai_api_key = naas.secret.get("OPENAI_API_KEY") or "YOUR_OPENAI_API_KEY"

# Outputs
spreadsheet_url = naas.secret.get("ABI_SPREADSHEET") or "YOUR_GOOGLE_SPREADSHEET_URL"
sheet_name = "CONTENT"
output_dir = os.path.join(naas_data_product.OUTPUTS_PATH, "content-engine", date.today().isoformat())
file_content = "content"

## Model

### Get data from Google Sheets spreadsheet

In [None]:
df_gsheet = gsheet.connect(spreadsheet_url).get(sheet_name=sheet_name)
print("Rows:", len(df_gsheet))
df_gsheet.head(1)

### Get posts from local

In [None]:
df_posts = pload(input_dir, file_name)    
print("Rows:", len(df_posts))
df_posts.head(1)

### Cleaning data

In [None]:
# Get topics
topics = {}
if "TOPICS" in df_gsheet.columns:
    for row in df_gsheet.itertuples():
        topics[row.CONTENT_URL] = row.TOPICS

df = df_posts.copy()

# Cleaning if title is None and Content = 'Video (native)' -> "Live"
df.loc[(df["TITLE"].astype(str) == 'None') & (df["CONTENT"] == 'Video (native)'), "TITLE"] = "Live"
df.loc[df["TITLE"].astype(str) == 'Live', "TEXT"] = "Live"

# Select
to_select = [
    "AUTHOR_NAME",
    "PUBLISHED_DATE",
    "TITLE",
    "TEXT",
    "CHARACTER_COUNT",
    "TAGS",
    "VIEWS",
    "LIKES",
    "COMMENTS",
    "SHARES",
    "ENGAGEMENT_SCORE",
    "POST_URL"
]

to_rename = {
    "POST_URL": "CONTENT_URL",
    "AUTHOR_NAME": "ENTITY",
    "TEXT": "CONTENT",
    "CHARACTER_COUNT": "CONTENT_LENGTH",
    "TAGS": "KEYWORDS",
}
df = df[to_select]
df = df.rename(columns=to_rename)
df.insert(loc=1, column="SCENARIO", value=pd.to_datetime(df['PUBLISHED_DATE'], format='%Y-%m-%d %H:%M:%S%z').dt.tz_convert(TIMEZONE).dt.strftime("W%W-%Y"))
df.insert(loc=2, column="SOURCE", value="LinkedIn")
df.insert(loc=4, column="DATE", value=pd.to_datetime(df['PUBLISHED_DATE'], format='%Y-%m-%d %H:%M:%S%z').dt.tz_convert(TIMEZONE).dt.strftime("%a. %d %b."))
df.insert(loc=5, column="TIME", value=pd.to_datetime(df['PUBLISHED_DATE'], format='%Y-%m-%d %H:%M:%S%z').dt.tz_convert(TIMEZONE).dt.strftime('%HH%M'))
df.insert(loc=8, column="TOPICS", value="TBU")

# Drop duplicates
df = pd.concat([df, df_gsheet])
df = df.drop_duplicates("CONTENT_URL", keep='first').reset_index(drop=True)

# Add new topics
prompt = "Identify the main topics discussed in the content and provide a concise list in a string"
for row in df.itertuples():
    content_url = row.CONTENT_URL
    content = row.CONTENT
    if content_url not in topics:
        topic = create_chat_completion(
            openai_api_key,
            prompt,
            content
        )
        topics[content_url] = topic
        pdump(input_dir, topics, "topics")
df["TOPICS"] = df["CONTENT_URL"].map(topics)

print("Rows:", len(df))
df.head(1)

## Output

### Save data

In [None]:
pdump(output_dir, df, file_content)

### Send data to Google Sheets spreadsheet

In [None]:
gsheet.connect(spreadsheet_url).send(sheet_name=sheet_name, data=df, append=False)