### Animal grouping generator
The program is for experimental animal grouping.

I had a project about supplements feed for swine, and before the experiment, I had to group the piglets. I randomly grouped them first, but the deviation of the piglets' weight was too large. It could be a better design that one of the groups had the strongest or weakest piglets, so I spent more than 1 hour arranging the groups. I did the same thing twice for my project, which was really a waste of time.

I decided to write a program for it then, and I did. I hope it is helpful.

In [1]:
import os
import numpy as np
import pandas as pd
import random

# If the animals' weight deviation is slight, the constant can lower to under 1.0
ALLOWED = 1.0

In [2]:
def import_excel(filename: str) -> pd.core.frame.DataFrame:
    """
    Import the data in the excel file's sheet 1 as a dataframe
    note: the file should be in the same directory
    """
    path = os.path.join(filename)  # in the same directory
    df = pd.read_excel(path)
    
    return df

In [3]:
def grouping_randomly(array: np.ndarray, group_number: int, max_number_per_group: int) -> np.ndarray:
    """
    Group the ndarray data randomly into group_number, 
    each group cannot have more than the max_number_per_group 
    and return as a ndarray
    """
    # create a grouped data container
    randomly_grouped_data = []
    for i in range(group_number):
        randomly_grouped_data.append(list())
        
    # iterate every data
    for data in array:
        # randomly assign into the different groups
        i = random.randrange(group_number)
        full = (len(randomly_grouped_data[i]) == max_number_per_group)
        
        # find the unfulfilled group
        while full:
            i = random.randrange(group_number)  
            full = (len(randomly_grouped_data[i]) == max_number_per_group)
        randomly_grouped_data[i].append(data)
    
    return np.array(randomly_grouped_data)

In [4]:
def check_std_for_each_group(grouped_data: np.ndarray, group_number: int) -> bool:
    """
    Check the standard deviation (std) is in the allowed range
    """
    std = grouped_data.std()
    
    # create a container for the std of each group
    std_of_each_group = []
    for i in range(group_number):
        std_of_each_group.append(grouped_data[i].std())
    
    for i in range(group_number):
        if (std_of_each_group[i] > std * ALLOWED):
            return True
    return False

In [5]:
class Animal_grouping_generator():
    def __init__(self, filename: str, group_number: int):
        self.df = import_excel(filename)
        self.sample_size = self.df.shape[0]
        self.weight_ndarray = np.array(self.df["Weight"])
        self.mean = self.weight_ndarray.mean()
        self.std = self.weight_ndarray.std()
        self.group_number = group_number
        self.number_per_group = self.sample_size // self.group_number  
        self.max_number_per_group = self.number_per_group if self.sample_size % self.group_number == 0 else self.number_per_group + 1
        
    
    def grouping(self, ndarray: np.ndarray, max_number: int):
        grouped_ndarray = grouping_randomly(ndarray, self.group_number, max_number)
        check = check_std_for_each_group(grouped_ndarray, self.group_number)
        counter = 0
        while check:
            grouped_ndarray = grouping_randomly(ndarray, self.group_number, max_number)
            check = check_std_for_each_group(grouped_ndarray, self.group_number)
            counter += 1
            if counter > 99:
                print(f"Tried {counter} times.\nThe deviation of the data is too big. Please try again or adjust the constant.\n")
                break
        
        return grouped_ndarray
    
    
    def convert_ndarray_to_df(self, grouped_ndarray):
        grouped_df = pd.DataFrame()
        for i in range(self.group_number):
            grouped_df[f"Group {i+1}"] = grouped_ndarray[i]
            
        return grouped_df
    
    
    def get_grouped_data(self):
        grouped_ndarray =  self.grouping(self.weight_ndarray, self.max_number_per_group)
        return self.convert_ndarray_to_df(grouped_ndarray)
        

In [6]:
ALLOWED = 1.09
grouping_tester = Animal_grouping_generator("data.xlsx", 5)
grouped_df = grouping_tester.get_grouped_data()
print(grouped_df, grouped_df.describe(), sep = '\n\n')

   Group 1  Group 2  Group 3  Group 4  Group 5
0    15.83    15.09    14.25    14.41    14.30
1    18.20    17.12    16.16    15.81    14.25
2    18.76    17.55    16.55    15.85    15.09
3    19.22    15.60    17.49    16.12    21.59
4    19.26    17.12    15.75    15.89    14.64
5    19.53    14.55    16.63    17.20    15.66
6    16.34    14.25    19.93    16.14    15.70
7    15.96    14.75    16.03    15.60    16.17
8    20.30    20.45    16.06    16.67    16.35
9    23.40    20.58    22.55    17.75    16.35

         Group 1    Group 2    Group 3   Group 4    Group 5
count  10.000000  10.000000  10.000000  10.00000  10.000000
mean   18.680000  16.706000  17.140000  16.14400  16.010000
std     2.297017   2.319517   2.390397   0.91511   2.117567
min    15.830000  14.250000  14.250000  14.41000  14.250000
25%    16.805000  14.835000  16.037500  15.82000  14.752500
50%    18.990000  16.360000  16.355000  16.00500  15.680000
75%    19.462500  17.442500  17.275000  16.53750  16.305000
ma

In [7]:
class Animal_grouping_generator_based_on_gender(Animal_grouping_generator):
    def __init__(self, filename, group_number):
        Animal_grouping_generator.__init__(self, filename, group_number)
        
        # female
        self.female_df = self.df[self.df["Sex"] == 'F']
        self.female_weight_ndarray = np.array(self.female_df["Weight"])
        self.female_sample_size = self.female_df.shape[0]
        self.female_mean = self.female_weight_ndarray.mean()
        self.female_std = self.female_weight_ndarray.std()
        self.female_number_per_group = self.female_sample_size // self.group_number  
        self.female_max_number_per_group = self.female_number_per_group if self.female_sample_size % self.group_number == 0 else self.female_number_per_group + 1
        
        # male
        self.male_df = self.df[self.df["Sex"] == 'M']
        self.male_weight_ndarray = np.array(self.male_df["Weight"])
        self.male_sample_size = self.male_df.shape[0]
        self.male_mean = self.female_weight_ndarray.mean()
        self.male_std = self.female_weight_ndarray.std()
        self.male_number_per_group = self.male_sample_size // self.group_number  
        self.male_max_number_per_group = self.male_number_per_group if self.male_sample_size % self.group_number == 0 else self.male_number_per_group + 1
        
        # grouped data
        self.grouped_female_data()
        self.grouped_male_data()
        
    
    def grouped_female_data(self):
        self.grouped_female_ndarray = self.grouping(self.female_weight_ndarray, self.female_max_number_per_group)
        self.grouped_female_list = self.grouped_female_ndarray.tolist()
        self.grouped_female_df = self.convert_ndarray_to_df(self.grouped_female_ndarray)
        
        
    def grouped_male_data(self):
        self.grouped_male_ndarray = self.grouping(self.male_weight_ndarray, self.male_max_number_per_group)
        self.grouped_male_list = self.grouped_male_ndarray.tolist()
        self.grouped_male_df = self.convert_ndarray_to_df(self.grouped_male_ndarray)
        
        
    def combining_two_sex(self):
        group_label = []
        for i in range(self.group_number):
            group_label.append(i)
        
        # group container
        grouped_list = []
        for i in range(self.group_number):
            number = random.choice(group_label)
            grouped_list.append(self.grouped_female_list[i] + self.grouped_male_list[number])
            group_label.pop(group_label.index(number))
            
        grouped_ndarray = np.array(grouped_list)
        
        return grouped_ndarray
        
    
    def grouping_with_two_gender(self):
        grouped_ndarray = self.combining_two_sex()
        check = check_std_for_each_group(grouped_ndarray, self.group_number)
        while check:
            grouped_ndarray = self.combining_two_sex()
            check = check_std_for_each_group(grouped_ndarray, self.group_number)
        
        return grouped_ndarray
    
    
    def get_grouped_data(self):
        grouped_ndarray =  self.grouping_with_two_gender()
        return self.convert_ndarray_to_df(grouped_ndarray)
    
    
    def get_grouped_female_data(self):
        return self.grouped_female_df
    
    
    def get_grouped_male_data(self):
        return self.grouped_male_df

In [8]:
ALLOWED = 1.25
grouping_tester = Animal_grouping_generator_based_on_gender("data.xlsx", 5)
df = grouping_tester.get_grouped_data()
print(df, df.describe(), sep = "\n\n")

   Group 1  Group 2  Group 3  Group 4  Group 5
0    16.35    21.59    16.34    14.25    14.25
1    15.60    17.12    16.03    14.55    14.64
2    16.63    17.55    14.75    14.25    16.12
3    15.96    17.20    16.06    15.75    15.60
4    15.09    14.30    15.66    15.81    15.09
5    15.85    14.41    15.89    16.14    15.70
6    16.67    16.16    16.55    16.17    15.83
7    17.49    16.35    20.30    18.20    19.26
8    17.75    17.12    20.45    18.76    20.58
9    19.93    19.53    23.40    19.22    22.55

         Group 1    Group 2    Group 3    Group 4    Group 5
count  10.000000  10.000000  10.000000  10.000000  10.000000
mean   16.732000  17.133000  17.543000  16.310000  16.962000
std     1.388435   2.180459   2.815765   1.833018   2.814359
min    15.090000  14.300000  14.750000  14.250000  14.250000
25%    15.877500  16.207500  15.925000  14.850000  15.217500
50%    16.490000  17.120000  16.200000  15.975000  15.765000
75%    17.285000  17.462500  19.362500  17.692500  18.4

In [9]:
df_f = grouping_tester.get_grouped_female_data()
print(df_f, df_f.describe(), sep = "\n\n")

   Group 1  Group 2  Group 3  Group 4  Group 5
0    16.35    21.59    16.34    14.25    14.25
1    15.60    17.12    16.03    14.55    14.64
2    16.63    17.55    14.75    14.25    16.12
3    15.96    17.20    16.06    15.75    15.60

         Group 1    Group 2    Group 3    Group 4    Group 5
count   4.000000   4.000000   4.000000   4.000000   4.000000
mean   16.135000  18.365000  15.795000  14.700000  15.152500
std     0.450222   2.158093   0.710516   0.714143   0.858967
min    15.600000  17.120000  14.750000  14.250000  14.250000
25%    15.870000  17.180000  15.710000  14.250000  14.542500
50%    16.155000  17.375000  16.045000  14.400000  15.120000
75%    16.420000  18.560000  16.130000  14.850000  15.730000
max    16.630000  21.590000  16.340000  15.750000  16.120000


In [10]:
df_m = grouping_tester.get_grouped_male_data()
print(df_m, df_m.describe(), sep = "\n\n")

   Group 1  Group 2  Group 3  Group 4  Group 5
0    15.81    15.66    15.09    15.09    14.30
1    16.14    15.89    15.70    15.85    14.41
2    16.17    16.55    15.83    16.67    16.16
3    18.20    20.30    19.26    17.49    16.35
4    18.76    20.45    20.58    17.75    17.12
5    19.22    23.40    22.55    19.93    19.53

         Group 1    Group 2    Group 3    Group 4    Group 5
count   6.000000   6.000000   6.000000   6.000000   6.000000
mean   17.383333  18.708333  18.168333  17.130000  16.311667
std     1.511882   3.145546   3.073873   1.695028   1.933840
min    15.810000  15.660000  15.090000  15.090000  14.300000
25%    16.147500  16.055000  15.732500  16.055000  14.847500
50%    17.185000  18.425000  17.545000  17.080000  16.255000
75%    18.620000  20.412500  20.250000  17.685000  16.927500
max    19.220000  23.400000  22.550000  19.930000  19.530000


### Conclusion
As you can see, the heaviest female piglet weighed 21.59 kg, and the lightest weighed 14.25 kg. Moreover, in the male group, the heaviest piglet weighed 23.40 kg, and the lightest one weighed only 14.30 kg. It's really really hard to arrange by ourselves. 

Fortunately, for now, the only thing we need to do is to run the program, and the program will group them immediately. Thank python, and good luck with your experiment!