# 1.0 - Imports and Installs

In [1]:
%pip install geopandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
import tweepy
import time
import os 
from datetime import timedelta
from shapely.geometry import Point
from shapely import wkt
from dotenv import load_dotenv

# 2.0 - Token and Dataframe

In [3]:
load_dotenv()
client = tweepy.Client(bearer_token=os.getenv("BEARER_TOKEN"),
                       consumer_key=os.getenv("API_KEY"),
                       consumer_secret=os.getenv("API_SECRET"),
                       access_token=os.getenv("ACCESS_TOKEN"),
                       access_token_secret=os.getenv("ACCESS_SECRET"))

In [4]:
# reading main dataframe/database
df_main = pd.read_csv("df_y4s2_main.csv", dtype={"tweet_id":object}).drop(columns=["Unnamed: 0"])

#csv to df
df_district = pd.read_csv("district.csv")
#df to geopandas df
geo_df = gpd.GeoDataFrame(df_district)
#string_polygon to geometry_polygon
df_district['geometry'] = gpd.GeoSeries.from_wkt(df_district['geometry'])
geo_df = gpd.GeoDataFrame(df_district, geometry='geometry')

# reading google forms data
url = "https://docs.google.com/spreadsheets/d/1ea_DF4ZtAvhvUUNM1Q57qeQov7Kp6lolCAZDxKKdNJ8/export?format=csv"
df_google_forms = pd.read_csv(url)

# 3.0 - Auto-reply

In [5]:
class AutoReply():

    def __init__(self, df_main: pd.DataFrame, geo_district: gpd.GeoDataFrame, df_main_forms: pd.DataFrame):
        """
        creates an auto-reply constructor
        Note:
        only needs to be run once or else data in df_main_forms will be lost  
        """
        self.df_main       = df_main
        self.geo_district  = geo_district
        self.df_main_forms = df_main_forms
        self.df_tweet_id   = None

    def set_reply_setting(self, no_of_tweets: int, msg: str, forms_to_csv: bool) -> None:
        """
        sets the auto_reply setting 
        Note: 
        no_of_tweets must be under 25
        """
        self.no_of_tweets_setting = no_of_tweets
        self.message_setting      = msg
        self.forms_to_csv         = forms_to_csv
        print("==================================================")
        print("reply_setting has been set successfully: ")
        print("no_of_tweets has been set to >", no_of_tweets)
        print("message has been set to      >", msg)
        print("forms_to_csv has been set to >", forms_to_csv)
        print("==================================================")

    # execute all auto-reply functions
    # running this will cause updates to the specifed dataframe
    # running this will use up tokens
    def main_auto_run(self, df_google_forms: pd.DataFrame) -> None:
        """
        the main auto-reply function 
        Note: 
        requires set_reply_setting to be set 
        """
        if hasattr(self, "no_of_tweets_setting") or hasattr(self, "message_setting") or (self, "forms_to_csv"):
            print("==================================================")
            print("Reply settings has been received")
            print("no_of_tweets is  >", self.no_of_tweets_setting)
            print("reply message is >", self.message_setting)
            print("forms_to_csv is  >", self.forms_to_csv)
            print("--------------------------------------------------")
            print("get_tweet_id is running:")
            self.get_tweet_id(self.no_of_tweets_setting)
            print("get_tweet_id has run successfully")
            print("--------------------------------------------------")
            print("create_tweet_reply is running:")
            self.create_tweet_reply(self.df_tweet_id, self.message_setting)
            print("create_tweet_reply has run successfully")
            print("--------------------------------------------------")
            print("insert_form_data is running:")
            self.insert_form_data(df_google_forms)
            print("insert_form_data has run successfully")
            print("--------------------------------------------------")
            print("update_main_forms is running:")
            self.update_main_forms()
            print("update_main_forms has run successfully")
            if self.forms_to_csv is True:
                print("--------------------------------------------------")
                print("exporting df_main_forms to csv:")
                self.df_main_forms.to_csv("df_main_forms.csv")
                print("df_main_forms has been exported successfully")
            print("==================================================")
        else:
            print("[Error] Please specify the auto-reply input with set_reply_setting")


    def over_one_week(self, datetime_input: pd.Timestamp) -> bool:
        """
        check if input date is over one week
        Example: 
        if date is over one week -> return True
        if date is not over one week -> return False
        """
        datetime_difference = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None) - datetime_input
        return datetime_difference > pd.Timedelta(days=7)

    def get_tweet_id(self, no_of_tweets: int) -> pd.DataFrame:
        """
        get a dataframe if tweet_id and type (command to update to update main database correctly)
        Categorical output types: 
        "datetime_of_first_form", "datetime_of_second_form", "False"
        """
        self.df_tweet_id = pd.DataFrame(columns=["tweet_id", "type"])
        count     = 0
        count_lim = 0
        limit     = 25
        if no_of_tweets > limit:
            print("[Error] Number of tweets is over specified limit of", limit)
            return None
        if no_of_tweets <= 0:
            print("[Error] Please provide a postive non-zero value")
            return None
        for i in range(len(self.df_main)):
            if count >= no_of_tweets or count_lim >= limit:
                return self.df_tweet_id
            if pd.isna(self.df_main["datetime_form_response"][i]) == True:
                # if no form sent for the first and second attemps, then send first forms
                if pd.isna(self.df_main["datetime_of_first_form"][i]) and pd.isna(self.df_main["datetime_of_second_form"][i]) == True:
                    new_row = pd.DataFrame({'tweet_id': [self.df_main["tweet_id"][i]], 'type': ["datetime_of_first_form"]})
                    self.df_tweet_id = pd.concat([self.df_tweet_id, new_row], ignore_index=True)
                    count = count + 1
                # if first form is sent and no second form sent
                elif pd.isna(self.df_main["datetime_of_first_form"][i]) == False and pd.isna(self.df_main["datetime_of_second_form"][i]) == True:
                    # if first form sent over 1 week, then send second form
                    if self.over_one_week(self.df_main["datetime_of_first_form"][i]) == True:
                        new_row = pd.DataFrame({'tweet_id': [self.df_main["tweet_id"][i]], 'type': ["datetime_of_second_form"]})
                        self.df_tweet_id = pd.concat([self.df_tweet_id, new_row], ignore_index=True)
                        count = count + 1
                    # if second form sent over 1 week, then no response
                elif self.over_one_week(self.df_main["datetime_of_second_form"][i]) == True:
                    new_row = pd.DataFrame({'tweet_id': [self.df_main["tweet_id"][i]], 'type': ["False"]})
                    self.df_tweet_id = pd.concat([self.df_tweet_id, new_row], ignore_index=True)
                    count_lim = count_lim + 1
        if self.df_tweet_id.empty:
            print("[-] No tweet id found")
        else:
            print(self.df_tweet_id[["type"]].rename(columns={"type":"[*] count response_type category"}).value_counts().to_string())
        return self.df_tweet_id

    def get_tweet_info(self, tweet_id: str) -> dict | tweepy.Response:
        """
        get information about the tweet with tweet_id with tweepy
        """
        return client.get_tweets(ids=tweet_id)

    def is_tweet_exists(self, tweet_id: str) -> bool:
        """
        check if tweet exist with tweet_id with tweepy
        """
        output = self.get_tweet_info(tweet_id)
        if len(output.errors) == 0:
            return True
        else:
            return False

    # WARNING: this will use up token
    # WARNING: this will create real tweet
    def create_tweet(self, tweet_id: str, msg: str) -> None:
        """
        create a reply with tweepy
        """
        # uncomment below to create_tweet
        # WARNING: this will use up token
        # print("[WARNING] client.create_tweet() is being used")
        # client.create_tweet(text=msg, in_reply_to_tweet_id=tweet_id)
        pass


    def create_tweet_reply(self, df_tweet_id: pd.DataFrame, msg: str) -> None:
        """
        this will update self.df_main and send out the tweets
        Input:
        df_tweet_id: input should be from get_tweet_id method, it takes "tweet_id" and "type"
        msg: message to reply to twitter users
        """
        if len(self.df_tweet_id) == 0:
            print("[-] No tweet id found. 0 Messages sent")
            return None
        for i in range(len(df_tweet_id)):
            if df_tweet_id["type"][i] == "False":
                self.df_main.loc[self.df_main.index[self.df_main["tweet_id"] == df_tweet_id["tweet_id"][i]], "datetime_form_response"] = "False"
            elif self.is_tweet_exists(df_tweet_id["tweet_id"][i]) == True:
                self.create_tweet(df_tweet_id["tweet_id"][i], msg)
                print("[+] Message has been sent to tweet_id:", df_tweet_id["tweet_id"][i], "message:", msg)
                self.df_main.loc[self.df_main.index[self.df_main["tweet_id"] == df_tweet_id["tweet_id"][i]], df_tweet_id["type"][i]] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None)
            else:
                print("[-] twitter_id:", df_tweet_id["tweet_id"][i], "is not found. datetime_form_response has been set to False")
                self.df_main.loc[self.df_main.index[self.df_main["tweet_id"] == df_tweet_id["tweet_id"][i]], "datetime_form_response"] = "False"

    def to_geo_point(self, latlon: str) -> Point:
        """
        converts string latlong to geopandas geometry point
        """
        coordinates = latlon.split(',')
        # convert latitude and longitude strings to floats
        if len(coordinates) == 1:
            print("[Error] Invalid latlon format")
            return None
        latitude = coordinates[0]
        longitude = coordinates[1]
        # from shapely.geometry import Point
        geo_point = Point(longitude, latitude)
        return geo_point

    def to_district(self, latlon: str) -> str:
        """
        converts string latlong to word-formatted district, or False (given that latlon is outside Bangkok)
        Output:
        potential output ["to-be-filled", "False", <one of the 50 districts>]
        """
        coordinates = latlon.split(',')
        if len(coordinates) == 1:
            return "to-be-filled"
        # convert latitude and longitude strings to floats
        latitude = float(coordinates[0])
        longitude = float(coordinates[1])
        # from shapely.geometry import Point
        geo_point = Point(longitude, latitude)
        # import df_district
        for i in range(50):
            if self.geo_district['geometry'][i].contains(geo_point) == True:
                return self.geo_district["dname_e"][i]
        return "False"

    def to_district_list(self, latlon_list: list[str]) -> list[str]:
        """
        converts string latlong to word-formatted district, or False (given that latlon is outside Bangkok)
        Output:
        potential output ["to-be-filled", "False", <one of the 50 districts>]
        sample input >> ["13.7563, 100.5018", "13.7246, 100.3522", "13.6801, 100.5682"]
        sample output >> ['Phra Nakhon', 'Nong Khaem', False]
        """
        output = []
        for i in range(len(latlon_list)):
            output.append(self.to_district(self.to_geo_point(latlon_list[i])))
        return output

    def is_username_exists(self, username_input: str) -> bool:
        """
        check if username exists in self.df_main to match with google forms data
        """
        return self.df_main[["username"]].isin([username_input]).any().any()

    def get_username_index(self, username_input: str) -> int:
        """
        give the index of the username, where the datetime_of_tweet is the earliest and return None if username is not found 
        """
        if self.is_username_exists(username_input):
            return self.df_main[self.df_main["username"] == username_input]["datetime_of_tweet"].idxmax()
        else:
            print("[-] username:", username_input, "not found")

    def insert_form_data(self, df_google_forms: pd.DataFrame) -> None:
        """
        concat extra rows in df_google_forms to the last index of df_main_forms
        """
        if self.df_main_forms.shape[0] >= df_google_forms.shape[0]:
            print("[-] No insert has been made")
            return None
        no_of_rows_updated = df_google_forms.shape[0] - self.df_main_forms.shape[0]
        self.df_main_forms = pd.concat([self.df_main_forms, df_google_forms.iloc[self.df_main_forms.shape[0]:, :]], axis=0)
        print("[+]",no_of_rows_updated,"rows df_google_forms has been inserted")

    def update_main_forms(self) -> None:
        """
        updates the df_main by checking for existing username and marks "added_to_db" to "True/False" whenever the username is found or not found 
        """
        no_of_rows_updated = 0
        for i in range(len(self.df_main_forms)):
            if pd.isna(self.df_main_forms["added_to_db"][i]):
                no_of_rows_updated += 1
                if self.is_username_exists(self.df_main_forms["username"][i]):
                    num_index = self.get_username_index(self.df_main_forms["username"][i])
                    self.df_main.loc[num_index, "datetime_form_response"] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None)
                    self.df_main.loc[num_index, "image_link"]             = self.df_main_forms["image_link"][i]
                    self.df_main.loc[num_index, "geo_coordinate"]         = self.df_main_forms["geo_coordinate"][i]
                    self.df_main.loc[num_index, "geo_true_district"]      = self.to_district(self.df_main_forms["geo_coordinate"][i])
                    self.df_main.loc[num_index, "geo_address"]            = self.df_main_forms["geo_address"][i]
                    self.df_main_forms["added_to_db"][i] = True
                else:
                    self.df_main_forms["added_to_db"][i] = False
                    print("[-] username:", self.df_main_forms["username"][i], "not found")
        print("[*]",no_of_rows_updated,"rows self.df_main_forms has been updated")

# 4.0 - Test Cases

## 4.1 - Test case functions

In [6]:
def create_test_df_main(df_input: pd.DataFrame) -> pd.DataFrame:

    df_output          = df_input[["username", "tweet_id", "datetime_of_tweet"]].copy()
    df_output.username = df_output.username.astype(str)
    df_output["datetime_of_tweet"]       = pd.to_datetime(df_output["datetime_of_tweet"])
    df_output["datetime_of_first_form"]  = None
    df_output["datetime_of_first_form"]  = pd.to_datetime(df_output["datetime_of_first_form"])
    df_output["datetime_of_second_form"] = None
    df_output["datetime_of_second_form"] = pd.to_datetime(df_output["datetime_of_second_form"])
    df_output["datetime_form_response"]  = None
    df_output["datetime_form_response"]  = pd.to_datetime(df_output["datetime_form_response"])

    df_output["image_link"]        = None
    df_output["geo_coordinate"]    = None
    df_output["geo_true_district"] = None
    df_output["geo_address"]       = None

    df_output = df_output.head(10).copy()
    df_output.reset_index(inplace=True)
    df_output.drop(columns="index", inplace=True)

    df_output.at[1, "datetime_of_first_form"] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None) - pd.Timedelta(days=6) # no reponse in 6 days (under 1 week); first
    df_output.at[2, "datetime_of_first_form"] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None) - pd.Timedelta(days=8) # no reponse in 8 days (over 1 week); first
    df_output.at[3, "datetime_of_second_form"] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None) - pd.Timedelta(days=6) # no reponse in 6 days (under 1 week); second
    df_output.at[4, "datetime_of_second_form"] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None) - pd.Timedelta(days=8) # no reponse in 8 days (over 1 week); second
    df_output.at[5, "datetime_of_first_form"] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None) - pd.Timedelta(days=6) # impossible case
    df_output.at[5, "datetime_of_second_form"] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None) - pd.Timedelta(days=8) # impossible case
    df_output.at[6, "datetime_form_response"] = "False"            # no reponse
    df_output.at[7, "datetime_form_response"] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None) # form reponse
    df_output.at[8, "tweet_id"] = "8888888888888888888" # no tweet exist
    df_output.at[9, "username"] = "SampleNotRealUsername" # no username exist

    return df_output

def create_test_df_response() -> pd.DataFrame:
    df_output = pd.DataFrame(np.nan, index=[0, 1, 2, 3, 4, 5, 6], columns=["username", "image_link", "geo_coordinate", "geo_address", "datetime_form_response", "added_to_db"])
    # username exists and coordinate in bkk
    df_output["username"][0]               = "amindichotomy_"
    df_output["image_link"][0]             = "image-link-0"
    df_output["geo_coordinate"][0]         = "13.7563,100.5018"   # Phra Nakhon
    df_output["geo_address"][0]            = "geo-address-0"
    df_output["datetime_form_response"][0] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None)
    # username NOT exists and coordinate in bkk
    df_output["username"][1]               = "thisIsNotRealUsername123"
    df_output["image_link"][1]             = "image-link-1"
    df_output["geo_coordinate"][1]         = "13.7246,100.3522"   # Nong Khaem
    df_output["geo_address"][1]            = "geo-address-1"
    df_output["datetime_form_response"][1] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None)
    # username exists and coordinate NOT in bkk
    df_output["username"][2]               = "SampleNotRealUsername"
    df_output["image_link"][2]             = "image-link-2"
    df_output["geo_coordinate"][2]         = "13.6801,100.5682"   # Not in BKK
    df_output["geo_address"][2]            = "geo-address-2"
    df_output["datetime_form_response"][2] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None)
    # username NOT exists and coordinate NOT in bkk
    df_output["username"][3]               = "anotherNotRealUsername123"
    df_output["image_link"][3]             = "image-link-3"
    df_output["geo_coordinate"][3]         = "13.044167189490327,100.66544849518354"   # Not in BKK
    df_output["geo_address"][3]            = "geo-address-3"
    df_output["datetime_form_response"][3] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None)
    # added to db FAILED
    df_output["username"][4]               = "BangkokInsight"
    df_output["image_link"][4]             = "image-link-0"
    df_output["geo_coordinate"][4]         = "13.7563,100.5018"   # Phra Nakhon
    df_output["geo_address"][4]            = "geo-address-0"
    df_output["datetime_form_response"][4] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None)
    df_output["added_to_db"][4]            = False
    # added to db
    df_output["username"][5]               = "BangkokInsight"
    df_output["image_link"][5]             = "image-link-0"
    df_output["geo_coordinate"][5]         = "13.7563,100.5018"   # Phra Nakhon
    df_output["geo_address"][5]            = "geo-address-0"
    df_output["datetime_form_response"][5] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None)
    df_output["added_to_db"][5]            = True
    # "to-be-filled"
    df_output["username"][6]               = "nongyooza"
    df_output["image_link"][6]             = "to-be-filled"
    df_output["geo_coordinate"][6]         = "to-be-filled"
    df_output["geo_address"][6]            = "to-be-filled"
    df_output["datetime_form_response"][6] = pd.Timestamp.now(tz="Asia/Vientiane").replace(tzinfo=None)

    return df_output

def create_test_df_main_forms() -> pd.DataFrame:
    df_output = pd.DataFrame(columns=["username", "image_link", "geo_coordinate", "geo_address", "datetime_form_response", "added_to_db"])
    return df_output


## 4.2 - Initializing test cases

In [None]:
df_main_test     = create_test_df_main(df_main)
df_response_test = create_test_df_response()
df_main_forms    = create_test_df_main_forms()

In [8]:
df_main_test

Unnamed: 0,username,tweet_id,datetime_of_tweet,datetime_of_first_form,datetime_of_second_form,datetime_form_response,image_link,geo_coordinate,geo_true_district,geo_address
0,amindichotomy_,1759934702352560249,2024-02-20 13:35:05,NaT,NaT,NaT,,,,
1,BangkokInsight,1759866626374254816,2024-02-20 09:04:35,2024-04-05 17:50:58.809488,NaT,NaT,,,,
2,sukkkkkii,1759820082002497768,2024-02-20 05:59:38,2024-04-03 17:50:58.809488,NaT,NaT,,,,
3,_JiggYPuFF_,1759740436422492316,2024-02-20 00:43:09,NaT,2024-04-05 17:50:58.809488,NaT,,,,
4,_prapat_,1759622140071170212,2024-02-19 16:53:05,NaT,2024-04-03 17:50:58.809488,NaT,,,,
5,poonarak4,1759571859065262082,2024-02-19 13:33:17,2024-04-05 17:50:58.809488,2024-04-03 17:50:58.809488,NaT,,,,
6,3f02243c0b72413,1759563889417834925,2024-02-19 13:01:37,NaT,NaT,False,,,,
7,Ma_Mon,1759485019478007917,2024-02-19 07:48:12,NaT,NaT,2024-04-11 17:50:58.817510,,,,
8,nongyooza,8888888888888888888,2024-02-19 07:08:51,NaT,NaT,NaT,,,,
9,SampleNotRealUsername,1759432073696379139,2024-02-19 04:17:49,NaT,NaT,NaT,,,,


In [9]:
df_response_test

Unnamed: 0,username,image_link,geo_coordinate,geo_address,datetime_form_response,added_to_db
0,amindichotomy_,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 17:50:58.817510,
1,thisIsNotRealUsername123,image-link-1,"13.7246,100.3522",geo-address-1,2024-04-11 17:50:58.817510,
2,SampleNotRealUsername,image-link-2,"13.6801,100.5682",geo-address-2,2024-04-11 17:50:58.817510,
3,anotherNotRealUsername123,image-link-3,"13.044167189490327,100.66544849518354",geo-address-3,2024-04-11 17:50:58.817510,
4,BangkokInsight,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 17:50:58.817510,False
5,BangkokInsight,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 17:50:58.826018,True
6,nongyooza,to-be-filled,to-be-filled,to-be-filled,2024-04-11 17:50:58.826018,


In [10]:
df_main_forms

Unnamed: 0,username,image_link,geo_coordinate,geo_address,datetime_form_response,added_to_db


# 5.0 Walkthrough Auto-reply

In [17]:
# creating an auto reply constructor

ar = AutoReply(df_main = df_main_test, geo_district = geo_df, df_main_forms = df_main_forms)

In [18]:
# sample main dataframe

#         column_name          |            data_type              |         details
# ["username"]                  object (string)
# ["tweet_id"]                  object (string)
# ["datetime_of_tweet"]         datetime64[ns]                      datetime upon creating the tweet
# ["datetime_of_first_form"]    datetime64[ns]                      datetime upon sending the forms the first time
# ["datetime_of_second_form"]   datetime64[ns]                      datetime upon sending the forms the second time
# ["datetime_form_response"]    datetime64[ns] AND object (string)  datetime upon matching data from google forms to main database AND false after datetime of second form is over 7 days
# ["image_link"]                object (string)                     google drive image link from google forms
# ["geo_coordinate"]            object (string)                     coordinate in lat and lon from google forms
# ["geo_true_district"]         object (string)                     generated from method "check district" possible values; 1. district name, 2. false (when geo_coordinate is outside bkk)
# ["geo_address"]               object (string)                     word formatted address from google forms

# add the missing columns according following the specified data_type

ar.df_main

Unnamed: 0,username,tweet_id,datetime_of_tweet,datetime_of_first_form,datetime_of_second_form,datetime_form_response,image_link,geo_coordinate,geo_true_district,geo_address
0,amindichotomy_,1759934702352560249,2024-02-20 13:35:05,NaT,NaT,NaT,,,,
1,BangkokInsight,1759866626374254816,2024-02-20 09:04:35,2024-04-05 04:54:38.917736,NaT,NaT,,,,
2,sukkkkkii,1759820082002497768,2024-02-20 05:59:38,2024-04-03 04:54:38.922174,NaT,NaT,,,,
3,_JiggYPuFF_,1759740436422492316,2024-02-20 00:43:09,NaT,2024-04-05 04:54:38.922174,NaT,,,,
4,_prapat_,1759622140071170212,2024-02-19 16:53:05,NaT,2024-04-03 04:54:38.922174,NaT,,,,
5,poonarak4,1759571859065262082,2024-02-19 13:33:17,2024-04-05 04:54:38.923179,2024-04-03 04:54:38.923179,NaT,,,,
6,3f02243c0b72413,1759563889417834925,2024-02-19 13:01:37,NaT,NaT,False,,,,
7,Ma_Mon,1759485019478007917,2024-02-19 07:48:12,NaT,NaT,2024-04-11 04:54:38.925185,,,,
8,nongyooza,8888888888888888888,2024-02-19 07:08:51,NaT,NaT,NaT,,,,
9,SampleNotRealUsername,1759432073696379139,2024-02-19 04:17:49,NaT,NaT,NaT,,,,


In [19]:
# function get_tweet_id():
# create a list of tweet_id and a type of response to be send to users

# input:
# 1. number of tweet_id to be replied (type: int; limit: 25 users)

# output:
# 1. (type: pandas dataframe)

ar.get_tweet_id(5)

[*] count response_type category
datetime_of_first_form              3
False                               2
datetime_of_second_form             1


Unnamed: 0,tweet_id,type
0,1759934702352560249,datetime_of_first_form
1,1759820082002497768,datetime_of_second_form
2,1759622140071170212,False
3,1759571859065262082,False
4,8888888888888888888,datetime_of_first_form
5,1759432073696379139,datetime_of_first_form


In [20]:
# WARNING: running this will use token           (function: get_tweet_info)
# WARNING: running this will create a real tweet (function: create_tweet)
# WARNING: running this will create changes to the specified dataframe

# function create_tweet_reply():
# create reply based on provided tweet_id AND type of form reponse dataframe

# input:
# 1. the list of tweet_id and a type of response to be send to users (type: pandas dataframe)
# 2. customizable message to reponse to user (type: string; note: pls do include google forms link)

# output:
# 1. No return output
# 2. Printed message of both sucessful and unsucessful attempts

msg = "ทดสอบส่งข้อความอัตโนมัติ + google_forms_link"
ar.create_tweet_reply(ar.df_tweet_id, msg)

[+] Message has been sent to tweet_id: 1759934702352560249 message: ทดสอบส่งข้อความอัตโนมัติ + google_forms_link
[+] Message has been sent to tweet_id: 1759820082002497768 message: ทดสอบส่งข้อความอัตโนมัติ + google_forms_link
[-] twitter_id: 8888888888888888888 is not found. datetime_form_response has been set to False
[+] Message has been sent to tweet_id: 1759432073696379139 message: ทดสอบส่งข้อความอัตโนมัติ + google_forms_link


In [21]:
# checking the updated dataframe
ar.df_main

# explaination based on the provided test case
# at index 0: update have been made to "datetime_of_first_form" since no forms has been sent in the record
# at index 1: update have NOT been made, since "datetime_of_first_form" is  UNDER 1 week
# at index 2: update have been made to "datetime_of_second_form" since "datetime_of_first_form" is OVER 1 week
# at index 3: update have NOT been made, since "datetime_of_second_form" is UNDER 1 week
# at index 4: update have been made to "datetime_form_response" since "datetime_of_second_form" is OVER 1 week
# at index 5: update have been made to "datetime_form_response" since "datetime_of_second_form" is OVER 1 week NOTE: datetime_of_second_form takes priority OVER datetime_of_first_form
# at index 6: NO updates has been made since "datetime_form_response" has been set to False; by default get_tweet_id() will SKIP this row
# at index 7: NO updates has been made since "datetime_form_response" has a RECORD of response as datetime; by default get_tweet_id() will SKIP this row
# at index 8: update have been made to "datetime_form_response" since tweet_id does NOT exists
# at index 9: same as index 0

Unnamed: 0,username,tweet_id,datetime_of_tweet,datetime_of_first_form,datetime_of_second_form,datetime_form_response,image_link,geo_coordinate,geo_true_district,geo_address
0,amindichotomy_,1759934702352560249,2024-02-20 13:35:05,2024-04-11 04:55:31.517435,NaT,NaT,,,,
1,BangkokInsight,1759866626374254816,2024-02-20 09:04:35,2024-04-05 04:54:38.917736,NaT,NaT,,,,
2,sukkkkkii,1759820082002497768,2024-02-20 05:59:38,2024-04-03 04:54:38.922174,2024-04-11 04:55:31.767721,NaT,,,,
3,_JiggYPuFF_,1759740436422492316,2024-02-20 00:43:09,NaT,2024-04-05 04:54:38.922174,NaT,,,,
4,_prapat_,1759622140071170212,2024-02-19 16:53:05,NaT,2024-04-03 04:54:38.922174,False,,,,
5,poonarak4,1759571859065262082,2024-02-19 13:33:17,2024-04-05 04:54:38.923179,2024-04-03 04:54:38.923179,False,,,,
6,3f02243c0b72413,1759563889417834925,2024-02-19 13:01:37,NaT,NaT,False,,,,
7,Ma_Mon,1759485019478007917,2024-02-19 07:48:12,NaT,NaT,2024-04-11 04:54:38.925185,,,,
8,nongyooza,8888888888888888888,2024-02-19 07:08:51,NaT,NaT,False,,,,
9,SampleNotRealUsername,1759432073696379139,2024-02-19 04:17:49,2024-04-11 04:55:32.223292,NaT,NaT,,,,


In [22]:
# sample dataframe google forms result

#         column_name    |        data_type           |                details
# ["username"]                  object (string)          user-submitted username used to link to main dataframe
# ["image_link"]                object (string)          google drive link to view the image
# ["geo_coordinate"]            object (string)          geo_coordinate in lat lon generated from google app script
# ["geo_address"]               object (string)          word formatted address generated from google app script
# ["datetime_form_response"]    object (string)          datetime upon user submitting the google forms

# brief explanation of each row
# at index 0: username IS exist,     coordinate IS in bkk
# at index 1: username IS NOT exist, coordinate IS in bkk
# at index 2: username IS exist,     coordinate NOT in bkk
# at index 3: username IS NOT exist, coordinate NOT in bkk

df_response_test

Unnamed: 0,username,image_link,geo_coordinate,geo_address,datetime_form_response,added_to_db
0,amindichotomy_,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 04:54:38.929220,
1,thisIsNotRealUsername123,image-link-1,"13.7246,100.3522",geo-address-1,2024-04-11 04:54:38.930221,
2,SampleNotRealUsername,image-link-2,"13.6801,100.5682",geo-address-2,2024-04-11 04:54:38.930221,
3,anotherNotRealUsername123,image-link-3,"13.044167189490327,100.66544849518354",geo-address-3,2024-04-11 04:54:38.930221,
4,BangkokInsight,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 04:54:38.931221,False
5,BangkokInsight,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 04:54:38.931221,True
6,nongyooza,to-be-filled,to-be-filled,to-be-filled,2024-04-11 04:54:38.931221,


In [23]:
# insert google forms to df_main_forms

ar.insert_form_data(df_response_test)
ar.insert_form_data(df_google_forms)

[+] 7 rows df_google_forms has been inserted
[+] 4 rows df_google_forms has been inserted


In [24]:
# check the main forms if it has been updated

ar.df_main_forms

Unnamed: 0,username,image_link,geo_coordinate,geo_address,datetime_form_response,added_to_db
0,amindichotomy_,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 04:54:38.929220,
1,thisIsNotRealUsername123,image-link-1,"13.7246,100.3522",geo-address-1,2024-04-11 04:54:38.930221,
2,SampleNotRealUsername,image-link-2,"13.6801,100.5682",geo-address-2,2024-04-11 04:54:38.930221,
3,anotherNotRealUsername123,image-link-3,"13.044167189490327,100.66544849518354",geo-address-3,2024-04-11 04:54:38.930221,
4,BangkokInsight,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 04:54:38.931221,False
5,BangkokInsight,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 04:54:38.931221,True
6,nongyooza,to-be-filled,to-be-filled,to-be-filled,2024-04-11 04:54:38.931221,
7,sth_wong,https://drive.google.com/open?id=1db5A3JRrvUAa...,"13.737521942343397,100.5257741906242",254 จุฬาลงกรณ์มหาวิทยาลัย ซ. จุฬาลงกรณ์ 9 Khwa...,3/19/2024 23:03:11,
8,test_friday_22,https://drive.google.com/open?id=1db5A3JRrvUAa...,"13.73683903630348,100.53311852791562","254 Henri Dunant Rd, Khwaeng Pathum Wan, Pathu...",3/22/2024 13:11:20,
9,test-new-col,https://drive.google.com/open?id=1db5A3JRrvUAa...,"13.751222305224355,100.54133227300414","419/8 Ratchaprarop Rd, Khwaeng Makkasan, Khet ...",3/22/2024 15:53:14,


In [25]:
# WARNING: running this will create changes to the specified dataframe

# function update_main_forms():
# updates the specifed dataframe(in most cases, the main dataframe) from the google forms reponse dataframe
# converts "geo_coordinate" to "geo_true_district", checking if the provided coordinate is in which bkk district or not

# input:
# 1. None

# output:
# 1. No return output
# 2. Printed message of username not found in the main dataframe (reason: the user most likely typed their username incorrectly)

ar.update_main_forms()

[-] username: thisIsNotRealUsername123 not found
[-] username: anotherNotRealUsername123 not found
[-] username: sth_wong not found
[-] username: test_friday_22 not found
[-] username: test-new-col not found
[-] username: pp_big_4 not found
[*] 9 rows self.df_main_forms has been updated


In [26]:
# checking the updated dataframe
ar.df_main

# explaination based on the provided test case
# at index 0: username IS exist, coordinate IS in bkk; so "datetime_form_response" has been updated to current time and "geo_true_district" is NOT false
# at index 9: username IS exist, coordinate NOT in bkk; so "datetime_form_response" has been updated to current time and "geo_true_district" is false
# at index null: username IS NOT exist, coordinate IS in bkk; so no changes have been made
# at index null: username IS NOT exist, coordinate NOT in bkk

Unnamed: 0,username,tweet_id,datetime_of_tweet,datetime_of_first_form,datetime_of_second_form,datetime_form_response,image_link,geo_coordinate,geo_true_district,geo_address
0,amindichotomy_,1759934702352560249,2024-02-20 13:35:05,2024-04-11 04:55:31.517435,NaT,2024-04-11 04:56:01.701025,image-link-0,"13.7563,100.5018",False,geo-address-0
1,BangkokInsight,1759866626374254816,2024-02-20 09:04:35,2024-04-05 04:54:38.917736,NaT,NaT,,,,
2,sukkkkkii,1759820082002497768,2024-02-20 05:59:38,2024-04-03 04:54:38.922174,2024-04-11 04:55:31.767721,NaT,,,,
3,_JiggYPuFF_,1759740436422492316,2024-02-20 00:43:09,NaT,2024-04-05 04:54:38.922174,NaT,,,,
4,_prapat_,1759622140071170212,2024-02-19 16:53:05,NaT,2024-04-03 04:54:38.922174,False,,,,
5,poonarak4,1759571859065262082,2024-02-19 13:33:17,2024-04-05 04:54:38.923179,2024-04-03 04:54:38.923179,False,,,,
6,3f02243c0b72413,1759563889417834925,2024-02-19 13:01:37,NaT,NaT,False,,,,
7,Ma_Mon,1759485019478007917,2024-02-19 07:48:12,NaT,NaT,2024-04-11 04:54:38.925185,,,,
8,nongyooza,8888888888888888888,2024-02-19 07:08:51,NaT,NaT,2024-04-11 04:56:01.721494,to-be-filled,to-be-filled,to-be-filled,to-be-filled
9,SampleNotRealUsername,1759432073696379139,2024-02-19 04:17:49,2024-04-11 04:55:32.223292,NaT,2024-04-11 04:56:01.711312,image-link-2,"13.6801,100.5682",False,geo-address-2


In [27]:
# check the main forms if it has been updated

ar.df_main_forms

Unnamed: 0,username,image_link,geo_coordinate,geo_address,datetime_form_response,added_to_db
0,amindichotomy_,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 04:54:38.929220,True
1,thisIsNotRealUsername123,image-link-1,"13.7246,100.3522",geo-address-1,2024-04-11 04:54:38.930221,False
2,SampleNotRealUsername,image-link-2,"13.6801,100.5682",geo-address-2,2024-04-11 04:54:38.930221,True
3,anotherNotRealUsername123,image-link-3,"13.044167189490327,100.66544849518354",geo-address-3,2024-04-11 04:54:38.930221,False
4,BangkokInsight,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 04:54:38.931221,False
5,BangkokInsight,image-link-0,"13.7563,100.5018",geo-address-0,2024-04-11 04:54:38.931221,True
6,nongyooza,to-be-filled,to-be-filled,to-be-filled,2024-04-11 04:54:38.931221,True
7,sth_wong,https://drive.google.com/open?id=1db5A3JRrvUAa...,"13.737521942343397,100.5257741906242",254 จุฬาลงกรณ์มหาวิทยาลัย ซ. จุฬาลงกรณ์ 9 Khwa...,3/19/2024 23:03:11,False
8,test_friday_22,https://drive.google.com/open?id=1db5A3JRrvUAa...,"13.73683903630348,100.53311852791562","254 Henri Dunant Rd, Khwaeng Pathum Wan, Pathu...",3/22/2024 13:11:20,False
9,test-new-col,https://drive.google.com/open?id=1db5A3JRrvUAa...,"13.751222305224355,100.54133227300414","419/8 Ratchaprarop Rd, Khwaeng Makkasan, Khet ...",3/22/2024 15:53:14,False


# 6.0 Auto Run Auto-reply

In [11]:
# creating an auto reply constructor

ar = AutoReply(df_main = df_main_test, geo_district = geo_df, df_main_forms = df_main_forms)

In [12]:
msg = "ทดสอบส่งข้อความอัตโนมัติ + google_forms_link"

ar.set_reply_setting(no_of_tweets=5 , msg=msg, forms_to_csv=False)

reply_setting has been set successfully: 
no_of_tweets has been set to > 5
message has been set to      > ทดสอบส่งข้อความอัตโนมัติ + google_forms_link
forms_to_csv has been set to > False


In [13]:
ar.main_auto_run(df_google_forms=df_response_test)

Reply settings has been received
no_of_tweets is  > 5
reply message is > ทดสอบส่งข้อความอัตโนมัติ + google_forms_link
forms_to_csv is  > False
--------------------------------------------------
get_tweet_id is running:
[*] count response_type category
datetime_of_first_form              3
False                               2
datetime_of_second_form             1
get_tweet_id has run successfully
--------------------------------------------------
create_tweet_reply is running:
[+] Message has been sent to tweet_id: 1759934702352560249 message: ทดสอบส่งข้อความอัตโนมัติ + google_forms_link
[+] Message has been sent to tweet_id: 1759820082002497768 message: ทดสอบส่งข้อความอัตโนมัติ + google_forms_link
[-] twitter_id: 8888888888888888888 is not found. datetime_form_response has been set to False
[+] Message has been sent to tweet_id: 1759432073696379139 message: ทดสอบส่งข้อความอัตโนมัติ + google_forms_link
create_tweet_reply has run successfully
--------------------------------------------