In [1]:
import pandas as pd
import numpy as np

def generate_mixed_dataset(n_samples=1000):
    """
    Generates a pandas DataFrame with random numerical and categorical data.
    """
    np.random.seed(42) # For reproducible results

    # 1. Generate Numerical Data (e.g., Experience following a normal distribution)
    # Using normalvariate for a Gaussian distribution
    experiences = np.random.normal(loc=45, scale=15, size=n_samples).astype(int)
    # Ensure experiences are positive
    experiences = np.clip(experiences, 0, 100)

    # 2. Generate Categorical Data (e.g., 'department' and 'rating')
    departments_skillwise = ['HR', 'Engineering', 'Sales', 'Marketing', 'Finance']
    # Using np.random.choice to select from a list
    dept_data = np.random.choice(departments_skillwise, size=n_samples, p=[0.1, 0.4, 0.2, 0.1, 0.2])

    ratings = ['ToBeTrained', 'Trained', 'CentreOfExcellence']
    # 'p' argument allows specifying custom probabilities
    rating_data = np.random.choice(ratings, size=n_samples, p=[0.2, 0.5, 0.3])

    # 3. Assemble into a pandas DataFrame
    df = pd.DataFrame({
        'Experience': experiences,
        'Department_Skill': dept_data,
        'Rating': rating_data
    })
    
    # Convert the columns to proper 'category' dtypes in pandas
    df['Department_Skill'] = df['Department_Skill'].astype('category')
    df['Rating'] = pd.Categorical(df['Rating'], categories=['ToBeTrained', 'Trained', 'CentreOfExcellence'], ordered=True)

    return df

# Generate and print the first 5 rows of the dataset
dataset = generate_mixed_dataset()
print(dataset.head())
print("\nData Types:")
print(dataset.dtypes)

   Experience Department_Skill              Rating
0          52      Engineering             Trained
1          42      Engineering         ToBeTrained
2          54            Sales         ToBeTrained
3          67        Marketing             Trained
4          41               HR  CentreOfExcellence

Data Types:
Experience             int32
Department_Skill    category
Rating              category
dtype: object
