In [7]:
%pip install -U langchain-openai langchain-core pulp pandas langchain-groq pulp pandas

Collecting langchain-groq
  Downloading langchain_groq-1.1.2-py3-none-any.whl.metadata (2.4 kB)
Collecting groq<1.0.0,>=0.30.0 (from langchain-groq)
  Downloading groq-0.37.1-py3-none-any.whl.metadata (16 kB)
Downloading langchain_groq-1.1.2-py3-none-any.whl (19 kB)
Downloading groq-0.37.1-py3-none-any.whl (137 kB)
Installing collected packages: groq, langchain-groq

  Attempting uninstall: groq

    Found existing installation: groq 1.0.0

    Uninstalling groq-1.0.0:

      Successfully uninstalled groq-1.0.0

   ---------------------------------------- 0/2 [groq]
   ---------------------------------------- 0/2 [groq]
   ---------------------------------------- 0/2 [groq]
   ---------------------------------------- 0/2 [groq]
   ---------------------------------------- 0/2 [groq]
   ---------------------------------------- 0/2 [groq]
   ---------------------------------------- 0/2 [groq]
   ---------------------------------------- 0/2 [groq]
   ---------------------------------------

In [8]:
import pulp
import pandas as pd
import os
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field

# --- 1. CONFIGURATION ---
# Get your free key at https://console.groq.com/
os.environ["GROQ_API_KEY"] = "gsk_your_key_here"

# --- 2. DATA ENGINEERING ---
data = {
    "Consultant": ["Arjun", "Sara", "Chen", "Elena"],
    "Skill": ["NLP", "Azure Cloud", "Optimization", "NLP"],
    "Hourly_Rate": [150, 120, 200, 140],
    "Availability_Hours": [40, 20, 30, 40]
}
df_resources = pd.DataFrame(data)

# --- 3. GEN-AI LAYER (Using Llama 3 via Groq) ---
class ProjectNeeds(BaseModel):
    required_skill: str = Field(description="The technical skill required")
    estimated_hours: int = Field(description="The hours the task will take")

def run_ai_pipeline(user_query):
    try:
        # EY JD mentions "Hugging Face" and "LLMs" - Llama 3 is the top choice here
        llm = ChatGroq(
            model_name="llama3-70b-8192",
            temperature=0
        )
        
        parser = JsonOutputParser(pydantic_object=ProjectNeeds)
        prompt = ChatPromptTemplate.from_template(
            "You are an EY Resource Manager. Analyze this project request: {query}\n\n{format_instructions}"
        )
        
        chain = prompt | llm | parser
        return chain.invoke({
            "query": user_query,
            "format_instructions": parser.get_format_instructions()
        })
    except Exception as e:
        print(f"AI Error: {e}")
        return None

# --- 4. OPTIMIZATION LAYER (MIP) ---
def solve_allocation(extracted_data):
    if not extracted_data: return "No data to optimize."
    
    skill = extracted_data['required_skill']
    hours = extracted_data['estimated_hours']
    
    # Filtering
    mask = (df_resources['Skill'] == skill) & (df_resources['Availability_Hours'] >= hours)
    qualified = df_resources[mask].copy()
    
    if qualified.empty:
        return f"No consultant available for {skill} for {hours} hours."

    # Solve
    prob = pulp.LpProblem("Resource_Optimization", pulp.LpMinimize)
    names = qualified['Consultant'].tolist()
    x = pulp.LpVariable.dicts("assign", names, cat=pulp.LpBinary)
    
    prob += pulp.lpSum([x[n] * qualified.loc[qualified['Consultant'] == n, 'Hourly_Rate'].values[0] * hours for n in names])
    prob += pulp.lpSum([x[n] for n in names]) == 1
    
    prob.solve(pulp.PULP_CBC_CMD(msg=0))
    
    for n in names:
        if pulp.value(x[n]) == 1:
            return {"Consultant": n, "Total_Cost": f"${pulp.value(prob.objective)}", "Status": "Optimal"}

# --- 5. EXECUTION ---
query = "We need a consultant for an NLP project. It should take about 25 hours."
needs = run_ai_pipeline(query)

if needs:
    print("--- AI Extracted Needs ---")
    print(needs)
    result = solve_allocation(needs)
    print("\n--- Optimized Assignment ---")
    print(result)

AI Error: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_key'}}


In [9]:
import pandas as pd
import random

# EY JD Requirement: Data Engineering - Curating and preprocessing datasets
def generate_big_dataset(rows=100):
    skills = ["NLP", "Azure Cloud", "Optimization", "Computer Vision", "Data Engineering", "MLOps"]
    names = [f"Consultant_{i}" for i in range(rows)]
    
    data = {
        "Consultant": names,
        "Skill": [random.choice(skills) for _ in range(rows)],
        "Hourly_Rate": [random.randint(80, 250) for _ in range(rows)],
        "Availability_Hours": [random.randint(5, 45) for _ in range(rows)]
    }
    df = pd.DataFrame(data)
    df.to_csv("ey_resource_pool.csv", index=False)
    print(f"Created ey_resource_pool.csv with {rows} rows.")

generate_big_dataset(2000)

Created ey_resource_pool.csv with 2000 rows.
