In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("/content/truncated_career_recommender_dataset.csv")

In [3]:
data.head()

Unnamed: 0,Undergraduate Course,UG Specialization,Interests,Skills,UG CGPA/Percentage,Certifications,Employment Status,Career Path
0,B.Sc,Computer Applications,Cloud computing,Python;SQL;Java,85.0,"Linux,Git",Yes,Software Developer
1,B.E,Computer Science Engineering,Technology,"Critical Thinking, Analytic Thinking, SQL, Pro...",66.5,Microsoft certification,Yes,Software Developer
2,BA,Psychology,Understand human behaviour,People management;Communication skills,64.6,Resilience psychology,No,Psychologist
3,MBA,Commerce,Sales/Marketing;Trading;Understand human behav...,Accounting Skills;Critical Thinking,75.525,No,Yes,Accountant
4,B.Tech,Instrumentation Engineering,Technology,\nPLC Allen Bradley;PLC Ladder Logic;LabVIEW;B...,70.68,Extreme Productivity (Blinkist Summary),Yes,Engineer


In [4]:
!pip install gradientai --upgrade

Collecting gradientai
  Downloading gradientai-1.8.0-py3-none-any.whl (296 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.5/296.5 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aenum>=3.1.11 (from gradientai)
  Downloading aenum-3.1.15-py3-none-any.whl (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.6/137.6 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydantic<2.0.0,>=1.10.5 (from gradientai)
  Downloading pydantic-1.10.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: aenum, pydantic, gradientai
  Attempting uninstall: pydantic
    Found existing installation: pydantic 2.6.3
    Uninstalling pydantic-2.6.3:
      Successfully uninstalled pydantic-2.6.3
Successfully installed aenum-3.1.15 gradientai-1.8.0 pydantic-1.10.14


In [5]:
import os
import csv
from gradientai import Gradient

In [6]:
from functools import total_ordering
# Set the Gradient environment variables
os.environ['GRADIENT_ACCESS_TOKEN'] = "hChzE2VjNTevcEtv8W02c868Gj2KtkVg"
os.environ['GRADIENT_WORKSPACE_ID'] = "4516658f-2630-4114-af57-6e73c033ab58_workspace"

# Defining the Dataset Path
carrer_dataset_path = "/content/truncated_career_recommender_dataset.csv"

# Initialize the Gradient
gradient = Gradient()

# Loading the dataset
print("Loading the data...")
formatted_data = []
with open(carrer_dataset_path, encoding='utf-8-sig') as f:
  dataset_data = csv.DictReader(f, delimiter=",")
  for row in dataset_data:
    # Constructing the promt from the user data
    user_data = f"Interests: {row['Interests']}, Skills: {row['Skills']}, Degree: {row['Undergraduate Course']}, Working: {row['Employment Status']}"
    # The response is the carrer path
    carrer_response = row['Career Path']
    # format the data fr fine-tuning
    formatted_entry = {
        "inputs": f"### User Data:\n{user_data}\n\n### Suggested Carrer Path:",
        "response": carrer_response
    }
    formatted_data.append(formatted_entry)

# getting the base model from the Gradient
base = gradient.get_base_model(base_model_slug="nous-hermes2")
new_model_adapter = base.create_model_adapter(name="ai_carrer_chatbot")

# Fine tuning the model adapter in chunks to prevent memory issues
print("Fine Tuning the model....")
chunck_lines = 20
total_chunks = [formatted_data[x:x+chunck_lines] for x in range(0,len(formatted_data), chunck_lines)]
for i , chunck in enumerate(total_chunks):
  try:
    print(f"Fine Tuning chunck {i+1} of {len(total_chunks)}")
    new_model_adapter.fine_tune(samples=chunck)
  except Exception as error:
    print(f"Error in fine tuing chunck {i+1}: {error}")

Loading the data...
Fine Tuning the model....
Fine Tuning chunck 1 of 5
Fine Tuning chunck 2 of 5
Fine Tuning chunck 3 of 5
Fine Tuning chunck 4 of 5
Fine Tuning chunck 5 of 5


In [7]:
user_query = "Interests: Research, Skills: Python, Degree: B.Sc in Commerce, Working: No"
formatted_query = f"### User Data:\n{user_query}\n\n### Suggested Carrer Path:"
response = new_model_adapter.complete(query=formatted_query, max_generated_token_count=50)
print(f"> {user_query}\n {response.generated_output}")

> Interests: Research, Skills: Python, Degree: B.Sc in Commerce, Working: No
  Research Analyst, Data Analyst, Data Scientist, Machine Learning Engineer, Business Analyst, Financial Analyst, Investment Analyst, Risk Analyst, Quantitative Analyst, Financial Risk Manager,
