In [1]:
!pip install -qU langchain openai transformers openpyxl langchain_community semopy

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/1.6 MB[0m [31m18.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.2/69.2 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.3/472.3 kB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.9/250.9 kB[0m [31m18.3 MB/

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import semopy
from sklearn.preprocessing import StandardScaler
from semopy import Model
from semopy import calc_stats
from langchain.tools import BaseTool
from math import pi
from typing import Union
import json
from typing import Optional

In [4]:
class SemopyTool(BaseTool):
    name: str = "SEM Correlation Finder"
    description: str = "Use this tool when you need to find direct and indirect correlations and latent variables. This gives you all positive and negative correlations."

    def _run(self, guidance: Optional[str] = None):
        data = pd.read_excel("/content/drive/My Drive/Processed_Data.xlsx", sheet_name="Final Data")
        data.drop(columns=["date","confirmed"],inplace=True)


        # Select numeric columns only
        numeric_columns = ['temperature', 'daylight_duration', 'sunshine_duration',
              'precipitation_sum', 'rain_sum', 'snowfall_sum', 'deaths', 'recovered','infected',
              'retail_and_recreation_percent_change',
              'grocery_and_pharmacy_percent_change', 'parks_percent_change',
              'transit_stations_percent_change', 'workplaces_percent_change',
              'residential_percent_change']


        # Initialize scalers
        z_scaler = StandardScaler()

        # Scaling with z-scores (standardization)
        z_scaled_data = data.copy()
        z_scaled_data[numeric_columns] = z_scaler.fit_transform(z_scaled_data[numeric_columns])

        desc="""
        # measurement model

        better_outdoor_environment =~ temperature + daylight_duration + sunshine_duration + precipitation_sum + rain_sum + snowfall_sum

        encouraging_outdoor_activities =~ better_outdoor_environment + school_closing + workplace_closing + gatherings_restrictions + transport_closing + stay_home_restrictions + internal_movement_restrictions + international_movement_restrictions + facial_coverings + elderly_people_protection

        panic =~ infected + deaths

        awareness =~ infected + deaths + school_closing + workplace_closing + gatherings_restrictions + transport_closing + stay_home_restrictions + internal_movement_restrictions + international_movement_restrictions + facial_coverings + elderly_people_protection

        # regressions

        infected ~ temperature + daylight_duration + sunshine_duration + precipitation_sum + rain_sum + snowfall_sum + better_outdoor_environment + encouraging_outdoor_activities + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change
        deaths ~ temperature + daylight_duration + sunshine_duration + precipitation_sum + rain_sum + snowfall_sum + better_outdoor_environment + encouraging_outdoor_activities + infected + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change

        school_closing ~ infected + deaths + panic + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change
        workplace_closing ~ infected + deaths + panic + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change
        gatherings_restrictions ~ infected + deaths + panic + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change
        transport_closing ~ infected + deaths + panic + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change
        stay_home_restrictions ~ infected + deaths + panic + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change
        internal_movement_restrictions ~ infected + deaths + panic + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change
        international_movement_restrictions ~ infected + deaths + panic + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change
        facial_coverings ~ infected + deaths + panic + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change
        elderly_people_protection ~ infected + deaths + panic + parks_percent_change + residential_percent_change + workplaces_percent_change + transit_stations_percent_change

        retail_and_recreation_percent_change ~ infected + deaths + school_closing + workplace_closing + gatherings_restrictions + transport_closing + stay_home_restrictions + internal_movement_restrictions + international_movement_restrictions + facial_coverings + elderly_people_protection + panic + awareness + encouraging_outdoor_activities
        grocery_and_pharmacy_percent_change ~ infected + deaths + school_closing + workplace_closing + gatherings_restrictions + transport_closing + stay_home_restrictions + internal_movement_restrictions + international_movement_restrictions + facial_coverings + elderly_people_protection + panic + awareness + encouraging_outdoor_activities
        parks_percent_change ~ infected + deaths + school_closing + workplace_closing + gatherings_restrictions + transport_closing + stay_home_restrictions + internal_movement_restrictions + international_movement_restrictions + facial_coverings + elderly_people_protection + panic + awareness + encouraging_outdoor_activities
        transit_stations_percent_change ~ infected + deaths + school_closing + workplace_closing + gatherings_restrictions + transport_closing + stay_home_restrictions + internal_movement_restrictions + international_movement_restrictions + facial_coverings + elderly_people_protection + panic + awareness + encouraging_outdoor_activities
        workplaces_percent_change ~ infected + deaths + school_closing + workplace_closing + gatherings_restrictions + transport_closing + stay_home_restrictions + internal_movement_restrictions + international_movement_restrictions + facial_coverings + elderly_people_protection + panic + awareness + encouraging_outdoor_activities
        residential_percent_change ~ infected + deaths + school_closing + workplace_closing + gatherings_restrictions + transport_closing + stay_home_restrictions + internal_movement_restrictions + international_movement_restrictions + facial_coverings + elderly_people_protection + panic + awareness + encouraging_outdoor_activities


        # residual correlations
        infected, deaths ~~ school_closing + workplace_closing + gatherings_restrictions + transport_closing + stay_home_restrictions + internal_movement_restrictions + international_movement_restrictions + facial_coverings + elderly_people_protection
        """

        mod = Model(desc)
        mod.fit(z_scaled_data, solver='SLSQP',options={'maxiter': 1000})
        stats = calc_stats(mod)
        corr=mod.inspect()
        mod.inspect()

        json_output = corr.to_json(orient='records', indent=4)
        return  json_output

    def _arun(self):
        raise NotImplementedError("This tool does not support async")

In [5]:
class MetaDataDescription(BaseTool):
    name: str = "Meta Data Detailing"
    description: str = """Use this tool when you need to understand about the data, provided in the SEM Correlation Finder. This tool gives the meta data about the data used for correlation finding.
    This explains about what does the parameters explain, what kind of restrictions are implemented and the encoding description for different level of restriction. """

    def _run(self, guidance: Optional[str] = None):
        meta_df = {
            sheet: pd.read_excel("/content/drive/My Drive/Meta Data.xlsx", sheet_name=sheet)
            for sheet in pd.ExcelFile("/content/drive/My Drive/Meta Data.xlsx").sheet_names
        }
        descriptions = {}
        for sheet, data in meta_df.items():
            for _, row in data.iterrows():
                descriptions[row["Variable"]] = row["Description"]

        json_output = json.dumps(descriptions, indent=4)
        return json_output

    def _arun(self):
        raise NotImplementedError("This tool does not support async")

In [7]:
import os
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory

key="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"

os.environ["OPENAI_API_KEY"] = key

OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') or 'OPENAI_API_KEY'

# initialize LLM (we use ChatOpenAI because we'll later define a `chat` agent)
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    temperature=0,
    model_name='gpt-3.5-turbo'
)
# initialize conversational memory
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)

In [8]:
from langchain.agents import initialize_agent, AgentType

tools = [SemopyTool(), MetaDataDescription()]

# initialize agent with tools
agent = initialize_agent(
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)


sys_msg = """Assistant is a large language model trained by OpenAI.

Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.

Unfortunately, Assistant is terrible at Correlation or covariance finding. When provided with correlation questions, no matter how simple, assistant always refers to it's trusty tools and absolutely does NOT try to answer correlation questions by itself.

Overall, Assistant is a powerful system that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.

This Assistant is focused to mimic human behaviour in finding the hidden patterns and trends using the tools, and generating detailed descriptions on what reasons have the events occured.
"""


new_prompt = agent.agent.create_prompt(
    system_message=sys_msg,
    tools=tools
)



  agent = initialize_agent(


In [9]:
agent("can you tell me how did the weather affected the infected rate")

  agent("can you tell me how did the weather affected the infected rate")




[1m> Entering new AgentExecutor chain...[0m


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}