In [1]:
from src.workflow import Workflow
from langchain_google_genai import ChatGoogleGenerativeAI
import os 
from dotenv import load_dotenv

load_dotenv()

genai_model=ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    google_api_key=os.environ["GOOGLE_API_KEY"]
)
builder = await Workflow.create(genai_model)

2025-07-17 16:37:15,506 - src.workflow - INFO - ✅ TOOL: scatter_plot
2025-07-17 16:37:15,507 - src.workflow - INFO - ✅ TOOL: query
2025-07-17 16:37:15,508 - src.workflow - INFO - ✅ TOOL: get_schema_info
2025-07-17 16:37:15,508 - src.workflow - INFO - ✅ TOOL: get_metrics_info
2025-07-17 16:37:15,509 - src.workflow - INFO - ✅ TOOL: get_tables_list
2025-07-17 16:37:15,509 - src.workflow - INFO - ✅ TOOL: PCA_tool
2025-07-17 16:37:15,510 - src.workflow - INFO - ✅ TOOL: similarity_tool


In [2]:
from langchain_core.messages import HumanMessage

init_state = {
    "messages": [HumanMessage(content="What are some efficient attackers of 2024-2025 season")]
}

async for s in builder.graph.astream(init_state):
    print("🔁 CURRENT STATE:")
    print(s[list(s.keys())[0]]["messages"][-1].content)

🔁 CURRENT STATE:
To measure the efficiency of attackers, consider the following metrics:
- **Goals per 90 minutes**: The number of goals a player scores, normalized to 90 minutes played.
- **Non-Penalty Goals per 90 minutes**: The number of non-penalty goals a player scores, normalized to 90 minutes played.
- **Assists per 90 minutes**: The number of assists a player provides, normalized to 90 minutes played.
- **Shots on target percentage**: Percentage of shots that are on target.
- **Conversion rate**: Percentage of shots that are converted into goals.


2025-07-17 16:37:18,237 - src.workflow - INFO - {'tool_call_request': 'First, I need to access the database to identify the tables containing player statistics. Call the `get_tables_list()` tool to retrieve the list of available tables.', 'observation': [{}]}
2025-07-17 16:37:18,242 - src.workflow - ERROR - ❌ TOOL: 2 validation errors for HumanMessage
content.str
  Input should be a valid string [type=string_type, input_value={'tool_call_request': 'Fi....', 'observation': [{}]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type
content.list[union[str,dict[any,any]]]
  Input should be a valid list [type=list_type, input_value={'tool_call_request': 'Fi....', 'observation': [{}]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/list_type


🔁 CURRENT STATE:
{
  "tool_call_request": "First, I need to access the database to identify the tables containing player statistics. Call the `get_tables_list()` tool to retrieve the list of available tables.",
  "observation": [
    {}
  ]
}


ValueError: ❌ 2 validation errors for HumanMessage
content.str
  Input should be a valid string [type=string_type, input_value={'tool_call_request': 'Fi....', 'observation': [{}]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type
content.list[union[str,dict[any,any]]]
  Input should be a valid list [type=list_type, input_value={'tool_call_request': 'Fi....', 'observation': [{}]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/list_type

In [None]:
def show_graph(graph, xray=False):
    """Display a LangGraph mermaid diagram with fallback rendering.
    
    Handles timeout errors from mermaid.ink by falling back to pyppeteer.
    
    Args:
        graph: The LangGraph object that has a get_graph() method
    """
    from IPython.display import Image
    try:
        # Try the default renderer first
        return Image(graph.get_graph(xray=xray).draw_mermaid_png())
    except Exception as e:
        # Fall back to pyppeteer if the default renderer fails
        import nest_asyncio
        nest_asyncio.apply()
        from langchain_core.runnables.graph import MermaidDrawMethod
        return Image(graph.get_graph().draw_mermaid_png(draw_method=MermaidDrawMethod.PYPPETEER))

In [None]:
show_graph(builder.graph)

In [None]:
question = "Who is the best all-round footballer of 2024-2025?"
init_state = {"messages": [{"role": "user", "content": question}]}

# 1) plain / non-stream run
state = await builder.graph.ainvoke(init_state)      # <- this returns a dict
print(state["messages"][-1])                          # last AIMessage
print(state["messages"][-1].tool_calls)   

In [None]:
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
X = [[0, 0, 0], [1, 1, 1], [0.5, 1, 0.5]]
sim_matrix=cosine_similarity(X)

In [None]:
player_names=["mot", "hai", "ba"]

In [None]:
df_similar_scores = pd.DataFrame(sim_matrix, index=player_names, columns=player_names)

In [None]:
df_similar_scores

In [None]:
query = '''
SELECT *
FROM "2018_2019" AS st17
LEFT JOIN "possession_2018_2019" AS ps17
  ON st17.Player = ps17.Player AND st17.Squad = ps17.Squad AND st17.Nation = ps17.Nation
LEFT JOIN "gca_2018_2019" AS gca17
  ON st17.Player = gca17.Player AND st17.Squad = gca17.Squad AND st17.Nation = gca17.Nation
'''
df = pd.read_sql_query(query, conn)


In [None]:
df = df.loc[:,~df.columns.duplicated()]

In [None]:
df.columns

In [None]:
df['Pos'].unique()

In [None]:
df = df[
    (
        (df['Pos'] =='MF,FW') | 
        )
    ]

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

features = ['npxG+xAG_90','Att 3rd', 'Succ','Succ%', 'Carries', 'PrgDist', 'SCA90', 'GCA90','PassLive_90']
X_values = df[features].values

scaler = StandardScaler()
scaled_values = scaler.fit_transform(X_values)
                
pca_converter = PCA(n_components=2)
principal_components = pca_converter.fit_transform(scaled_values)
pca_df = pd.DataFrame(principal_components, columns=['PCA1', 'PCA2'])

In [None]:
pca_df = pca_df[
    (
        (pca_df['PCA1'] >= df['PCA1'].max() * 0.65) | (pca_df['PCA2'] >= pca_df['PCA2'].max() * 0.65)
    )
]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Min-Max normalization
df['xAG_norm'] = (df['xAG'] - df['xAG'].min()) / (df['xAG'].max() - df['xAG'].min())
df['PrgC_norm'] = (df['PrgC'] - df['PrgC'].min()) / (df['PrgC'].max() - df['PrgC'].min())

plt.figure(figsize=(12, 8))
ax = sns.scatterplot(data=df, x='xAG_norm', y='PrgC_norm')

for _, row in df.iterrows():
    ax.text(x=row['xAG_norm'], y=row['PrgC_norm'] - 0.03, s=row['Player'], fontsize=8, ha='center', va='center')

ax.set_xlabel('xAG (normalized)')
ax.set_ylabel('PrgC (normalized)')
plt.title('Normalized Scatterplot of xAG vs PrgC')
plt.show()