In [1]:
import pandas as pd
import random
import uuid 

# How many random records?
num_records = 1000

genders = ["Male", "Female"]
age_groups = ["18-34", "35-54", "55+"]
income_levels = ["low", "medium", "high"]

data = {
    "Respondentkey" : [uuid.uuid4() for _ in range(num_records)],
    "Gender": [random.choice(genders) for _ in range(num_records)],
    "Age_groups": [random.choice(age_groups) for _ in range(num_records)],
    "Income": [random.choice(income_levels) for _ in range(num_records)]
}

data = pd.DataFrame(data)
data.head()

Unnamed: 0,Respondentkey,Gender,Age_groups,Income
0,f34bf917-df05-4107-a4a5-0d8b1af50f1d,Male,18-34,high
1,e3b5161e-9d48-4d77-9acc-519ca4569a84,Male,18-34,low
2,2c119816-eb85-4c63-aafb-d03ff72f1a4f,Male,18-34,low
3,7965f775-f56d-451a-92c2-9c3e0406d923,Male,55+,high
4,f4add545-0275-474a-b4ea-052fef69c278,Male,18-34,high


OR

In [2]:
# import pandas as pd
# data = pd.read_csv(r'examples\data-with-pre_weight.csv')
# data = pd.read_csv(r'examples\data_with_weight_by_Dimensions.csv.csv')

In [3]:
# Define weighting targets
spec = {
    "Gender": {"Male": 0.5, "Female": 0.5},
    "Age_groups": {"18-34": 0.4, "35-54": 0.4, "55+": 0.2},
    "Income": {"low": 0.33, "medium": 0.34, "high": 0.33}
}

In [4]:
from rim_weighting.rim_pandas import RIMWeightingPandas

# Initialize the RIM Weighting class
rim = RIMWeightingPandas(
    data=data, 
    spec=spec,
    pre_weight=None,
    id='Respondentkey'
)

# Apply weights
weighted_data = rim.apply_weights(
    max_iterations=10, 
    min_weight=0.5,
    max_weight=1.5
)

Iteration 1: RMS Error = 7.252160, Efficiency = 89.56%, Max Weight = 1.4150, Min Weight = 0.5329
Iteration 2: RMS Error = 0.013888, Efficiency = 89.57%, Max Weight = 1.4136, Min Weight = 0.5333
Iteration 3: RMS Error = 0.000023, Efficiency = 89.57%, Max Weight = 1.4137, Min Weight = 0.5333
✅ Converged by `RMS error < 0.005` in 3 iterations.


In [5]:
rim.generate_summary()

|    | Gender   |   Unweighted Count |   Unweighted % |   Weighted_Count |   Min_Weight |   Max_Weight |   Weighted % |
|----|----------|--------------------|----------------|------------------|--------------|--------------|--------------|
|  0 | Female   |                505 |        50.5000 |         500.0000 |       0.5416 |       1.4137 |      50.0000 |
|  1 | Male     |                495 |        49.5000 |         500.0000 |       0.5333 |       1.3918 |      50.0000 |


|    | Age_groups   |   Unweighted Count |   Unweighted % |   Weighted_Count |   Min_Weight |   Max_Weight |   Weighted % |
|----|--------------|--------------------|----------------|------------------|--------------|--------------|--------------|
|  0 | 18-34        |                300 |        30.0000 |         400.0000 |       1.2808 |       1.4137 |      40.0000 |
|  1 | 35-54        |                340 |        34.0000 |         400.0000 |       1.1292 |       1.2463 |      40.0000 |
|  2 | 55+          | 

In [6]:
weighted_data.to_csv('data.csv')

In [7]:
rim.get_weighted_factors()

Unnamed: 0_level_0,rim_weight
Respondentkey,Unnamed: 1_level_1
f34bf917-df05-4107-a4a5-0d8b1af50f1d,1.391786
e3b5161e-9d48-4d77-9acc-519ca4569a84,1.280830
2c119816-eb85-4c63-aafb-d03ff72f1a4f,1.280830
7965f775-f56d-451a-92c2-9c3e0406d923,0.579456
f4add545-0275-474a-b4ea-052fef69c278,1.391786
...,...
99fd03ce-e435-4f6d-98cc-c3123e64e668,0.588562
5c3ce8e3-ae13-4c56-9ba2-e3ea2ff3ce58,1.391786
7346c81b-0361-47b8-9238-ea526b0bc49b,1.167732
0b73e28e-7d7f-446c-b0d6-98fc877c80ab,1.300957
