In [25]:
import pandas as pd
import random

# How many random records?
num_records = 1000

genders = ["Male", "Female"]
age_groups = ["18-34", "35-54", "55+"]
income_levels = ["low", "medium", "high"]

data = {
    "Gender": [random.choice(genders) for _ in range(num_records)],
    "Age_groups": [random.choice(age_groups) for _ in range(num_records)],
    "Income": [random.choice(income_levels) for _ in range(num_records)]
}

data = pd.DataFrame(data)
data.head()

Unnamed: 0,Gender,Age_groups,Income
0,Male,35-54,low
1,Female,55+,low
2,Male,18-34,medium
3,Female,18-34,low
4,Male,18-34,low


OR

In [26]:
# import pandas as pd
# data = pd.read_csv(r'examples\data-with-pre_weight.csv')
# data = pd.read_csv(r'examples\data_with_weight_by_Dimensions.csv.csv')

In [27]:
# Define weighting targets
spec = {
    "Gender": {"Male": 0.5, "Female": 0.5},
    "Age_groups": {"18-34": 0.4, "35-54": 0.4, "55+": 0.2},
    "Income": {"low": 0.33, "medium": 0.34, "high": 0.33}
}

In [28]:
from rim_weighting.rim_pandas import RIMWeightingPandas

# Initialize the RIM Weighting class
rim = RIMWeightingPandas(
    data=data, 
    spec=spec,
    pre_weight=None
)

# Apply weights
weighted_data = rim.apply_weights(
    max_iterations=10, 
    min_weight=0.5,
    max_weight=1.5
)

Iteration 1: RMS Error = 1.981048, Efficiency = 91.23%, Max Weight = 1.4220, Min Weight = 0.5000
Iteration 2: RMS Error = 0.186636, Efficiency = 91.15%, Max Weight = 1.4272, Min Weight = 0.5000
Iteration 3: RMS Error = 0.048550, Efficiency = 91.13%, Max Weight = 1.4277, Min Weight = 0.5000
Iteration 4: RMS Error = 0.012420, Efficiency = 91.13%, Max Weight = 1.4278, Min Weight = 0.5000
Iteration 5: RMS Error = 0.003184, Efficiency = 91.13%, Max Weight = 1.4279, Min Weight = 0.5000
Iteration 6: RMS Error = 0.000816, Efficiency = 91.13%, Max Weight = 1.4279, Min Weight = 0.5000
✅ Converged by RMS error < 0.001 in 6 iterations.


In [29]:
rim.generate_summary()

|    | Gender   |   Unweighted Count |   Unweighted % |   Weighted_Count |   Min_Weight |   Max_Weight |   Weighted % |
|----|----------|--------------------|----------------|------------------|--------------|--------------|--------------|
|  0 | Female   |                541 |        54.1000 |         500.0008 |       0.5000 |       1.1997 |      50.0000 |
|  1 | Male     |                459 |        45.9000 |         499.9999 |       0.5886 |       1.4279 |      50.0000 |


|    | Age_groups   |   Unweighted Count |   Unweighted % |   Weighted_Count |   Min_Weight |   Max_Weight |   Weighted % |
|----|--------------|--------------------|----------------|------------------|--------------|--------------|--------------|
|  0 | 18-34        |                333 |        33.3000 |         399.9999 |       1.0028 |       1.4073 |      40.0000 |
|  1 | 35-54        |                330 |        33.0000 |         400.0001 |       1.0175 |       1.4279 |      40.0000 |
|  2 | 55+          | 

In [30]:
weighted_data.to_csv('data.csv')