In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import random

# Charger le dataset (fichier CSV avec séparateur ';')
file_path = "Jobopportunities.xlsx"
df = pd.read_excel(file_path)

# Garder uniquement les colonnes nécessaires et créer une copie explicite
df_filtered = df[['Job Title', 'Company']].copy()

# Ajouter une colonne 'Text' avec un texte plus complexe
def generate_text(row):
    job_title = row["Job Title"]
    company_name = row["Company"]

    # Générer une description plus complexe avec des variantes
    templates = [
        f"{job_title} needed at {company_name}! Join our dynamic team to achieve great things together.",
        f"Exciting opportunity: {job_title} position open at {company_name}. Seize your chance today!",
        f"{company_name} is seeking a passionate {job_title} to innovate and grow with us.",
        f"Are you a skilled {job_title}? {company_name} wants you on board! Apply now and be part of our journey.",
        f"{company_name} has an urgent opening for a {job_title}. Shape the future with us—apply today!",
        f"Looking for your next challenge? {company_name} is hiring a talented {job_title}. Don't miss out!"
    ]
    return random.choice(templates)

df_filtered['Text'] = df_filtered.apply(generate_text, axis=1)


# Sauvegarder le résultat dans un nouveau fichier CSV
output_path = "Finalfiltered_job_data_complex.csv"
df_filtered.to_csv(output_path, index=False, sep=';')

# Afficher un aperçu
print(df_filtered.head())


               Job Title            Company  \
0      Software Engineer           ABC Tech   
1           Data Analyst      XYZ Analytics   
2       Network Engineer  Network Solutions   
3        Cloud Architect   Cloud Innovators   
4  Cybersecurity Analyst        SecureGuard   

                                                Text  
0  Are you a skilled Software Engineer? ABC Tech ...  
1  Data Analyst needed at XYZ Analytics! Join our...  
2  Network Engineer needed at Network Solutions! ...  
3  Cloud Architect needed at Cloud Innovators! Jo...  
4  Exciting opportunity: Cybersecurity Analyst po...  


In [None]:
import pandas as pd
import random

# Charger le dataset (fichier CSV avec séparateur ';')
file_path = "organized_job_data_semicolon.csv"
df = pd.read_csv(file_path, sep=';')

# Garder uniquement les colonnes nécessaires et créer une copie explicite
df_filtered = df[['Job Title', 'Company Name']].copy()

# Ajouter une colonne 'Text' avec un texte plus complexe
def generate_text(row):
    job_title = row["Job Title"]
    company_name = row["Company Name"]

    # Générer une description plus complexe avec des variantes
    templates = [
        f"{job_title} needed at {company_name}! Join our dynamic team to achieve great things together.",
        f"Exciting opportunity: {job_title} position open at {company_name}. Seize your chance today!",
        f"{company_name} is seeking a passionate {job_title} to innovate and grow with us.",
        f"Are you a skilled {job_title}? {company_name} wants you on board! Apply now and be part of our journey.",
        f"{company_name} has an urgent opening for a {job_title}. Shape the future with us—apply today!",
        f"Looking for your next challenge? {company_name} is hiring a talented {job_title}. Don't miss out!"
    ]
    return random.choice(templates)

df_filtered['Text'] = df_filtered.apply(generate_text, axis=1)



# Sauvegarder le résultat dans un nouveau fichier CSV
output_path = "filtered_job_data_complex_updated.csv"
df_filtered.to_csv(output_path, index=False, sep=';')

# Afficher un aperçu
print(df_filtered.head())


                       Job Title                   Company Name  \
0   développeur symfony confirmé                       OCTASOFT   
1                  Sales Manager  United HR Solutions Pvt. Ltd.   
2      Human Resources Recruiter      BDNT Labs Private Limited   
3  Back-End Management Executive                 Suprem Service   
4             Back-End Executive                 Suprem Service   

                                                Text  
0  OCTASOFT has an urgent opening for a développe...  
1  Exciting opportunity: Sales Manager position o...  
2  BDNT Labs Private Limited has an urgent openin...  
3  Suprem Service has an urgent opening for a Bac...  
4  Back-End Executive needed at Suprem Service! J...  


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from joblib import Memory
import pickle

# Load data
data = pd.read_csv("Finalfiltered_job_data_complex.csv", sep=";")
data = data.dropna(subset=["Job Title", "Company", "Text"])

# Parameters for TfidfVectorizer
vectorizer_params = {
    "stop_words": "english",
    "ngram_range": (1, 2),
    "lowercase": True,
    "min_df": 5,  # Ignore terms appearing in fewer than 5 documents
}

# Memory caching for the TF-IDF Vectorizer
memory = Memory("./cache", verbose=0)

# Function to train and evaluate a model using cross-validation
def train_and_evaluate_model_cv(X, y, model_name, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    fold = 1
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Define pipeline
        pipeline = Pipeline([
            ("vectorizer", memory.cache(TfidfVectorizer)(**vectorizer_params)),
            ("classifier", GradientBoostingClassifier(
                n_estimators=100,  # Slightly more trees for better accuracy
                max_depth=5,  # Limit depth to reduce computation
                subsample=0.9,  # Use 80% of data for each tree
                random_state=42
            ))
        ])

        # Training
        pipeline.fit(X_train, y_train)

        # Evaluate performance
        y_pred = pipeline.predict(X_test)
        print(f"{model_name} Classification Report for fold {fold}:")
        print(classification_report(y_test, y_pred, zero_division=0))
        fold += 1

    # Save the final model trained on the whole dataset
    pipeline.fit(X, y)
    model_file = f"{model_name.lower().replace(' ', '_')}_FFFfinalmodel.pkl"
    with open(model_file, "wb") as file:
        pickle.dump(pipeline, file)

    print(f"{model_name} final model saved to {model_file}")
    return pipeline

# Train models using cross-validation
job_title_model = train_and_evaluate_model_cv(data["Text"].values, data["Job Title"].values, "Job Title")
company_name_model = train_and_evaluate_model_cv(data["Text"].values, data["Company"].values, "Company Name")




Job Title Classification Report for fold 1:
                                    precision    recall  f1-score   support

              AI Ethics Consultant       1.00      1.00      1.00         2
                    AI/ML Engineer       1.00      1.00      1.00         1
              Blockchain Developer       1.00      1.00      1.00         2
                   Cloud Architect       1.00      1.00      1.00         2
            Cloud Security Analyst       0.00      0.00      0.00         0
           Cloud Security Engineer       1.00      1.00      1.00         1
           Cloud Solutions Analyst       1.00      1.00      1.00         3
         Cloud Solutions Architect       0.00      0.00      0.00         1
            Cloud Solutions Intern       0.00      0.00      0.00         1
            Cloud Support Engineer       1.00      1.00      1.00         1
             Cybersecurity Analyst       1.00      1.00      1.00         1
            Cybersecurity Engineer       1.



Company Name Classification Report for fold 1:
                          precision    recall  f1-score   support

               AI Ethics       1.00      1.00      1.00         2
          AI Innovations       1.00      1.00      1.00         1
          Asset Managers       0.00      0.00      0.00         1
           Audit Experts       1.00      1.00      1.00         1
            Biz Analysts       0.00      0.00      0.00         0
         Blockchain Tech       1.00      1.00      1.00         1
                     CED       1.00      1.00      1.00         2
         Change Managers       0.00      0.00      0.00         0
          Cloud Analysts       1.00      1.00      1.00         3
        Cloud Architects       0.00      0.00      0.00         1
       Cloud Innovations       0.00      0.00      0.00         1
        Cloud Innovators       1.00      1.00      1.00         1
           Cloud Support       1.00      1.00      1.00         2
      Compliance Experts    

In [None]:
# Example of extracting job title and company name from a new job description text
new_text = """
Senior Front-end Angular Developer
•	CED
•	Tunis, Tunisie

Postes vacants:
1 poste ouvert
Type d'emploi désiré :
CDI
Experience :
3 à 5 ans
Niveau d'étude :
Ingénieur
Langue :
Français, Anglais
Genre :
Indifférent
Description de l'emploi
CED Group is an European service provider for many insurers, property managers, corporates and governments. Its business consists on protecting and restoring value for the citizens all over 14 European countries. 2000+ employees and experts are serving our customers all over Europe. CED Tunisia is a subsidiary of CED Group. It consists of an IT-BI Development center and a Businees Process Operations (BPO) team to support our various European offices and drive operational excellence. We employ 110 professionals as of now and going to recruit 20 new positions in the coming 3 months, 5 of them in IT. We are building in Tunis the digitalisation of all our front office applications and part of our middle office. Our solutions are in a Full Stack Environment using the most recent Microsoft Technologies: Angular, C#, .Net, SQL Server databases and Azure Cloud Technology.
We are reinforcing our team by recruiting a Senior Front-end Angular Developer who is passionate about design and programming. Responsibilities include implementing visual elements and their behaviors with user interactions. You will work with both front-end and back-end web developers to build all client-side logic. You will also be bridging the gap between the visual elements and the server-side infrastructure, taking an active role on both sides, and defining how the application looks and functions.

"""

# Predict the job title and company name from the new text
job_title_prediction = job_title_model.predict([new_text])
company_name_prediction = company_name_model.predict([new_text])

print(f"Predicted Job Title: {job_title_prediction[0]}")
print(f"Predicted Company Name: {company_name_prediction[0]}")

Predicted Job Title: Senior Front-end Angular Developer
Predicted Company Name: CED
