In [1]:
import os
import numpy as np
import pandas as pd
import itertools
from datetime import datetime
from typing import Iterable
from typing import List, Optional
import matplotlib.pyplot as plt  

# Functions

In [2]:
def detect_markers_type(df):
    global markers_type  # create/update global variable
    
    marker_cols = df.columns[1:]

    # Detect if any marker contains "/"
    contains_slash = any(
        "/" in str(v)
        for v in df[marker_cols].stack()
    )

    # Assign marker type
    if contains_slash:
        markers_type = "microsatellites"
        print("Type of markers detected: Microsatellites")
    else:
        markers_type = "SNPs"
        print("Type of markers detected: SNPs")

    return markers_type


In [3]:
def micro_heterozygosity(df, count_nans_as_invalid=True):
    """
    Computes observed and expected heterozygosity per column and the inbreeding
    coefficient F.

    - The FIRST column of df is treated as the sample/individual ID (ignored in calculations).
    - Remaining columns are processed as genotype strings "A/B".
    - Returns: DataFrame with rows ['observed_heterozygosity', 'expected_heterozygosity']
      and a column 'average' (mean across loci).
    - Creates global variables F and heterozygosity_table.

    Notes:
      - NaN cells are excluded from per-column denominators by default. Set
        count_nans_as_invalid=False to include NaNs as invalid entries (denominator = total rows).
    """

    # First column is always treated as ID
    id_col = df.columns[0]

    # All other columns are genotype columns
    cols = [c for c in df.columns if c != id_col]

    observed = {}
    expected = {}

    for col in cols:
        series_raw = df[col]

        # handle missing values
        if count_nans_as_invalid:
            series = series_raw.dropna().astype(str)
        else:
            series = series_raw.fillna('').astype(str)

        total = len(series)
        if total == 0:
            observed[col] = np.nan
            expected[col] = np.nan
            continue

        # observed heterozygosity (Ho): left allele != right allele
        def is_hetero(cell):
            if '/' not in cell:
                return False
            L, R = cell.split('/', 1)
            return L.strip() != R.strip()

        Ho = series.map(is_hetero).sum() / total
        observed[col] = Ho

        # expected heterozygosity (He)
        def is_homo(cell):
            if '/' not in cell:
                return False
            L, R = cell.split('/', 1)
            return L.strip() == R.strip()

        homozygotes = series[series.map(is_homo)]
        counts = homozygotes.value_counts()
        freqs = counts / total
        He = 1 - np.sum(freqs ** 2)
        expected[col] = He

    # build result table
    result = pd.DataFrame(
        [observed, expected],
        index=["observosed_heterozygosity", "expected_heterozygosity"]
    )
    result["average"] = result.mean(axis=1)

    # compute global averages and F
    Ho_avg = result.loc["observosed_heterozygosity", cols].mean()
    He_avg = result.loc["expected_heterozygosity", cols].mean()
    F_value = 1 - (Ho_avg / He_avg) if (He_avg is not None and not np.isnan(He_avg) and He_avg != 0) else np.nan

    # global vars
    globals()['F'] = F_value
    globals()['heterozygosity_table'] = result

    # print only F
    print("Inbreeding coefficient F:", F_value)
    print("")

    return result


In [4]:
def micro_pairwise_relatedness(df, decimals: int = 4) -> pd.DataFrame:
    """
    For every pair of rows in df, compute relatedness = matches / total_possible_elements.
    Returns a DataFrame named r with columns: pair, relatedness.
    Also creates the global object r.

    Behavior:
    - If df is None, uses global df_gen by default (expects df_gen to exist).
    - If df has a column named 'individual', pair labels will be "ind_i_ind_j" using those values.
      Otherwise labels are "1_2", "1_3", etc.
    - The 'individual' column (if present) is NOT used for the relatedness calculation.
    """

    # Default to df_gen if no df was passed
    global df_gen
    if df is None:
        df = df_gen

    # Determine which columns are used for relatedness calculations (exclude 'individual' if present)
    data_cols = [c for c in df.columns if c != "individual"]

    def split_cell(cell: object) -> list:
        if pd.isna(cell):
            return []
        s = str(cell).strip()
        if s == "":
            return []
        return [elem.strip() for elem in s.split(",")]

    results = []
    nrows = len(df)
    row_pairs = list(itertools.combinations(range(nrows), 2))

    # Pre-split cache for data columns only
    split_cache = {
        r_i: {col: split_cell(df.iloc[r_i][col]) for col in data_cols}
        for r_i in range(nrows)
    }

    # prepare individual names (fallback to 1-based indices if missing)
    if "individual" in df.columns:
        individual_names = df["individual"].astype(str).tolist()
        use_individuals = True
    else:
        individual_names = None
        use_individuals = False

    for i, j in row_pairs:
        # pair label using individual names if available
        if use_individuals:
            ind_i = individual_names[i]
            ind_j = individual_names[j]
            pair_label = f"{ind_i}_{ind_j}"
        else:
            pair_label = f"{i+1}_{j+1}"

        total_matches = 0
        total_possible = 0

        for col in data_cols:
            elems_i = split_cache[i][col]
            elems_j = split_cache[j][col]

            max_len = max(len(elems_i), len(elems_j))
            total_possible += max_len

            for k in range(max_len):
                a = elems_i[k] if k < len(elems_i) else None
                b = elems_j[k] if k < len(elems_j) else None
                if a is not None and b is not None and a == b:
                    total_matches += 1

        relatedness = (total_matches / total_possible) if total_possible > 0 else 0.0
        results.append({
            "pair": pair_label,
            "relatedness": round(relatedness, decimals)
        })

    # Export global relatedness
    global r, relatedness_table
    r = pd.DataFrame(results)
    relatedness_table = r
    return r


In [5]:
def snp_heterozygosity(df, count_nans_as_invalid=True):
    """
    Computes observed and expected heterozygosity per column and the inbreeding
    coefficient F.

    Accepts SNP genotypes encoded as:
      0 -> homozygote REF/REF
      1 -> heterozygote REF/ALT
      2 -> homozygote ALT/ALT

    PARAMETERS:
      df: DataFrame with the data
      count_nans_as_invalid: if True, NaNs are excluded from the denominator

    RETURNS:
      DataFrame with Ho and He per locus and average.

    It also creates:
      - global variable F
      - global variable heterozygosity_table
    """
    
    # Automatically use the first column's name as the column to ignore
    ignore_cols = (df.columns[0],)

    cols = [c for c in df.columns if c not in ignore_cols]

    observed = {}
    expected = {}

    for col in cols:
        series_raw = df[col]

        # Denominator based on parameter count_nans_as_invalid
        if count_nans_as_invalid:
            # Exclude NaNs from the denominator
            series_valid = series_raw.dropna()
            total = len(series_valid)
        else:
            # Include NaNs in the denominator
            series_valid = series_raw.dropna()
            total = len(series_raw)

        if total == 0:
            observed[col] = np.nan
            expected[col] = np.nan
            continue

        # Convert entries to 0/1/2 if valid
        def to_int_if_valid(x):
            try:
                if pd.isna(x):
                    return np.nan
                xi = int(x)
                if xi in (0, 1, 2):
                    return xi
                return np.nan
            except Exception:
                return np.nan

        genotypes_valid = series_valid.map(to_int_if_valid).dropna().astype(int)

        # Observed heterozygosity (Ho): proportion of heterozygotes (1)
        n_het = (genotypes_valid == 1).sum()
        Ho = n_het / total if total > 0 else np.nan
        observed[col] = Ho

        # Expected heterozygosity (He): 2*p*q
        n0 = (genotypes_valid == 0).sum()  # homozygote REF/REF
        n1 = (genotypes_valid == 1).sum()  # heterozygote REF/ALT
        n2 = (genotypes_valid == 2).sum()  # homozygote ALT/ALT
        n_valid = n0 + n1 + n2  # number of valid genotypes

        if n_valid == 0:
            He = np.nan
        else:
            # Allele counts: REF allele = 2*n0 + n1 ; ALT allele = 2*n2 + n1
            ref_count = 2 * n0 + n1
            alt_count = 2 * n2 + n1
            denom = 2 * n_valid  # Total number of alleles
            p = ref_count / denom
            q = alt_count / denom
            He = 1 - (p**2 + q**2)  # equivalent to 2*p*q

        expected[col] = He

    # Build final result table
    result = pd.DataFrame(
        [observed, expected],
        index=["observed_heterozygosity", "expected_heterozygosity"]
    )
    result["average"] = result.mean(axis=1)

    # Compute global averages and F
    Ho_avg = result.loc["observed_heterozygosity", cols].mean()
    He_avg = result.loc["expected_heterozygosity", cols].mean()
    F_value = 1 - (Ho_avg / He_avg) if (He_avg is not None and not np.isnan(He_avg) and He_avg != 0) else np.nan

    # Create global variable F
    globals()['F'] = F_value

    # Create global variable heterozygosity_table
    globals()['heterozygosity_table'] = result

    print("Inbreeding coefficient F:", F_value)
    print("")

    return result

In [6]:
def snp_pairwise_relatedness(df=None, decimals: int = 4) -> pd.DataFrame:
    """
    Compute pairwise relatedness as Pearson correlation between rows (individuals).

    Notes:
      - First column is treated as the sample ID.
      - The remaining columns are SNPs coded as 0/1/2.
      - Uses pairwise-complete Pearson correlation.
      - Produces global variables r and relatedness_table.
    """

    print("Step 1/6: Checking input dataframe...")
    global df_gen
    if df is None:
        df = df_gen

    if df is None:
        raise ValueError("No dataframe provided and global df_gen is not defined.")

    nrows = len(df)
    if nrows < 2:
        print("Not enough rows to compute pairwise relatedness.")
        r = pd.DataFrame(columns=["pair", "relatedness"])
        globals()['r'] = r
        globals()['relatedness_table'] = r
        return r

    print("Step 2/6: Extracting individual IDs (first column)...")
    id_col = df.columns[0]
    ids = df[id_col].astype(str).tolist()

    print("Step 3/6: Converting SNP data to numeric format...")
    # Convert remaining columns to numeric (0/1/2), invalid -> NaN
    data = df.drop(columns=[id_col]).apply(pd.to_numeric, errors='coerce')

    print("Step 4/6: Computing correlation matrix (this may take a while)...")
    # Transpose so columns = individuals
    data_T = data.T
    data_T.columns = ids

    corr_matrix = data_T.corr(method='pearson')

    print("Step 5/6: Cleaning correlation matrix (replacing NaN with 0.0)...")
    corr_matrix_filled = corr_matrix.fillna(0.0)

    print("Step 6/6: Building pairwise relatedness output...")
    results = []
    cols = corr_matrix_filled.columns.tolist()

    for a, b in itertools.combinations(cols, 2):
        val = corr_matrix_filled.at[a, b]
        results.append({
            "pair": f"{a}_{b}",
            "relatedness": round(float(val), decimals)
        })

    r = pd.DataFrame(results)
    globals()['r'] = r
    globals()['relatedness_table'] = r

    print("Done! Pairwise relatedness successfully computed.")
    return r


In [7]:
def phenotypic_similarity(
    df: Optional[pd.DataFrame],
    trait_cols: Optional[List[str]] = None,
    decimals: int = 4,
) -> pd.DataFrame:
    """
    Compute bounded phenotypic similarity in [0,1] for each trait and each pair of rows,
    using R_k = max_k - min_k (range) for normalization.

    similarity_k(i,j) = (R_k - abs(x_i - x_j)) / R_k

    Behavior:
    - If df is None, the function will try to use a global DataFrame named `mor`.
      If `mor` is not found, a ValueError is raised.
    - If trait_cols is None, numeric columns are auto-selected excluding the FIRST
      column (treated as the sample/individual ID).
    - If R_k == 0 (all values identical) similarity is set to 1 for that trait.
    - If either x_i or x_j is NaN → similarity is NaN for that trait.
    - Output: DataFrame with "pair" column (using values from the FIRST column if present,
      otherwise 1-based indices "i_j") and one column per trait.
    - Final DataFrame is assigned to the global variable `z` and also returned.

    Parameters
    ----------
    df : pd.DataFrame or None
        Input dataframe containing phenotypic trait columns. If None, uses global `mor`.
    trait_cols : list[str] | None
        List of trait column names to use. If None, auto-select numeric columns
        (excluding the first column).
    decimals : int
        Number of decimals to round similarity values.

    Returns
    -------
    pd.DataFrame
        DataFrame named `z` with columns: 'pair' and one column per selected trait.
    """
    # fallback to global 'mor' if df not provided
    if df is None:
        if 'mor' not in globals():
            raise ValueError(
                "df not provided and no global DataFrame named 'mor' found. "
                "Please pass df or create 'mor'."
            )
        df = globals()['mor']

    if not isinstance(df, pd.DataFrame):
        raise TypeError("df must be a pandas DataFrame or None.")

    cols = list(df.columns)
    if len(cols) == 0:
        raise ValueError("Provided DataFrame has no columns.")

    # The FIRST column is treated as the ID column and excluded from trait selection
    id_col = cols[0]
    data_cols = cols[1:]  # candidate trait columns

    # select traits
    if trait_cols is not None:
        if not isinstance(trait_cols, (list, tuple)):
            raise TypeError("trait_cols must be a list/tuple of column names or None.")
        missing = [c for c in trait_cols if c not in cols]
        if missing:
            raise KeyError(f"The following requested columns do not exist in the DataFrame: {missing}")
        # disallow using the id column as a trait
        if id_col in trait_cols:
            raise ValueError(f"The first column ('{id_col}') is treated as ID and cannot be used as a trait.")
        non_numeric = [c for c in trait_cols if not pd.api.types.is_numeric_dtype(df[c])]
        if non_numeric:
            raise TypeError(f"The following columns are not numeric: {non_numeric}")
        selected_traits = list(trait_cols)
    else:
        # auto-select numeric columns excluding the first column
        selected_traits = [c for c in data_cols if pd.api.types.is_numeric_dtype(df[c])]

    if not selected_traits:
        numeric_cols = [c for c in data_cols if pd.api.types.is_numeric_dtype(df[c])]
        raise ValueError(
            "No selectable traits found (selected_traits is empty). "
            f"Numeric columns available (excluding first column '{id_col}'): {numeric_cols}."
        )

    # compute ranges = max - min for the selected traits
    ranges = df[selected_traits].max() - df[selected_traits].min()
    ranges = ranges[selected_traits]  # ensure same order/index

    results = []
    nrows = len(df)

    # prepare individual names (use the values from the first column; fallback to 1-based indices)
    id_values = df.iloc[:, 0].astype(str).tolist()
    use_ids = True  # we always have a first column to use as label

    for i, j in itertools.combinations(range(nrows), 2):
        if use_ids:
            ind_i = id_values[i]
            ind_j = id_values[j]
            pair_label = f"{ind_i}_{ind_j}"
        else:
            pair_label = f"{i+1}_{j+1}"

        row = {"pair": pair_label}
        for col in selected_traits:
            xi = df.iloc[i][col]
            xj = df.iloc[j][col]
            R = ranges.loc[col]

            if pd.isna(xi) or pd.isna(xj):
                sim = float("nan")
            else:
                diff = abs(xi - xj)
                if pd.isna(R) or R == 0:
                    sim = 1.0
                else:
                    sim = (R - diff) / R
                    # clip to [0,1]
                    if sim < 0:
                        sim = 0.0
                    elif sim > 1:
                        sim = 1.0

            row[col] = round(sim, decimals) if pd.notna(sim) else sim

        results.append(row)

    # create and export global z
    global z, phenotypic_similarity_table
    z = pd.DataFrame(results)
    phenotypic_similarity_table = z
    return z


In [8]:
def heritability(
    r_df: Optional[pd.DataFrame] = None,
    z_df: Optional[pd.DataFrame] = None,
    r_col: str = "relatedness",
    key: str = "pair",
    F: Optional[float] = None,
    ddof: int = 0,
    n_bootstrap: int = 1000,
    ci: float = 0.95,
    random_state: Optional[int] = None
) -> pd.DataFrame:
    """
    Computes per-trait heritability with bootstrap standard errors and confidence intervals.

    Behavior:
      - If r_df is None, tries to use global variable `r`.
      - If z_df is None, tries to use global variable `z`.
      - If F is None, tries to use global variable `F`.
      - Raises ValueError with clear message if any required input is missing.

    Returns a DataFrame with columns:
      - trait
      - heritability (point estimate)
      - standard_error (bootstrap SE)
      - ci_lower (bootstrap percentile CI)
      - ci_upper
    Also assigns the returned DataFrame to global `heritability_table`.
    """

    # --- fallback to globals if not provided ---
    if r_df is None:
        if 'r' in globals():
            r_df = globals()['r']
        else:
            raise ValueError("r_df not provided and no global 'r' found. Please pass r_df or create global 'r'.")

    if z_df is None:
        if 'z' in globals():
            z_df = globals()['z']
        else:
            raise ValueError("z_df not provided and no global 'z' found. Please pass z_df or create global 'z'.")

    if F is None:
        if 'F' in globals():
            F = globals()['F']
        else:
            raise ValueError("F not provided and no global 'F' found. Please pass F or run heterozygosity() first.")
    F = float(F)

    # ------------ Merge ----------------------
    merged = pd.merge(r_df[[key, r_col]], z_df, on=key, how="inner")
    if merged.empty:
        raise ValueError("Merge resulted in empty dataframe. Check keys in r_df and z_df.")

    n_pairs = merged.shape[0]

    # ------------ Extract and validate r ------
    r = pd.to_numeric(merged[r_col], errors="coerce")
    if r.isna().any():
        raise ValueError(f"Non-numeric values found in relatedness column '{r_col}'.")

    var_r = r.var(ddof=ddof)
    if var_r == 0 or np.isclose(var_r, 0):
        raise ValueError("Variance of relatedness is zero; cannot compute heritability.")

    # ------------ Trait columns ---------------
    trait_cols = [c for c in merged.columns if c not in {key, r_col}]
    if not trait_cols:
        raise ValueError("No trait columns found after merging r_df and z_df.")

    # ------------ Center data -----------------
    r_centered = r - r.mean()
    z = merged[trait_cols].apply(pd.to_numeric, errors="coerce")
    if z.isna().any().any():
        raise ValueError("Non-numeric values detected in trait columns after conversion.")
    z_centered = z - z.mean()

    # ------------ Point estimates --------------
    covariances = (r_centered.values.reshape(-1, 1) * z_centered.values).mean(axis=0)
    h2_unadjusted = covariances / (2.0 * var_r)
    h2_point = h2_unadjusted * (1.0 + F)

    # ------------ Bootstrap --------------------
    rng = np.random.default_rng(random_state)
    B = int(n_bootstrap)
    boot_estimates = np.full((B, len(trait_cols)), np.nan)

    for b in range(B):
        idx = rng.integers(0, n_pairs, size=n_pairs)
        r_s = r.values[idx]
        z_s = z.values[idx, :]

        var_r_s = np.var(r_s, ddof=ddof)
        if var_r_s == 0 or np.isclose(var_r_s, 0):
            # skip this bootstrap replicate (invalid)
            continue

        r_s_centered = r_s - r_s.mean()
        z_s_centered = z_s - z_s.mean(axis=0)

        cov_s = (r_s_centered.reshape(-1, 1) * z_s_centered).mean(axis=0)
        h2_unadj_s = cov_s / (2.0 * var_r_s)
        h2_s = h2_unadj_s * (1.0 + F)

        boot_estimates[b, :] = h2_s

    # Remove invalid bootstrap iterations
    valid = ~np.all(np.isnan(boot_estimates), axis=1)
    boot_valid = boot_estimates[valid]

    if boot_valid.shape[0] == 0:
        raise RuntimeError("All bootstrap replicates invalid (variance of r was zero in all replicates).")

    # ------------ Standard errors + CI ---------
    se = np.nanstd(boot_valid, axis=0, ddof=1)

    alpha = (1 - ci) / 2
    ci_lower = np.nanpercentile(boot_valid, 100 * alpha, axis=0)
    ci_upper = np.nanpercentile(boot_valid, 100 * (1 - alpha), axis=0)

    # ------------ Final output -----------------
    out = pd.DataFrame({
        "trait": trait_cols,
        "heritability": h2_point,
        "standard_error": se,
        "ci_lower": ci_lower,
        "ci_upper": ci_upper
    })
    
    out = out.round(4)
    
    # assign to global convenience variable
    globals()['heritability_table'] = out[["trait", "heritability", "standard_error", "ci_lower", "ci_upper"]]
    print('n_bootstrap:', n_bootstrap)
    print('')
    print('heritability_table:')
    print('')
    print(heritability_table)
    print('')
    print('To access more results, type: F, heterozygosity_table, relatedness_table, phenotypic_similarity_table, heritability_table.')
    print('To save all data, type:  save_run()')
    return globals()['heritability_table']


# Compilation

In [9]:
def h2(markers, traits):
    """
    High-level pipeline to compute:
      1. Heterozygosity
      2. Pairwise relatedness
      3. Phenotypic similarity
      4. Heritability (h²)

    Marker type is automatically detected using `detect_markers_type()`.

    Expected formats:
    -----------------
    Microsatellites:
        - Cells MUST contain alleles separated by '/'
        - Examples: '154/160', '202/202'

    SNPs:
        - Genotypes MUST NOT contain '/'
        - Values typically 0,1,2 or allele codes like 'AA','AG','GG'
    """

    # Detect marker type
    markers_type = detect_markers_type(markers)

    # Branch depending on marker type
    if markers_type == 'microsatellites':
        micro_heterozygosity(markers)
        micro_pairwise_relatedness(markers)

    elif markers_type == 'SNPs':
        snp_heterozygosity(markers)
        snp_pairwise_relatedness(markers)

    else:
        raise ValueError(
            f"Unrecognized markers_type '{markers_type}'.\n\n"
            "Valid marker types and required formats:\n"
            "  • microsatellites → cells contain '/', e.g. '150/158'\n"
            "  • SNPs            → numeric or allele codes without '/', e.g. 0/1/2 or 'AA'\n\n"
            "Please check your markers DataFrame format."
        )

    # Compute phenotypic similarity
    phenotypic_similarity(traits)

    # Compute heritability
    heritability()


# Tests

### Chili - microsatellites

In [10]:
markers = pd.read_csv('./chili/gen.csv')
traits = pd.read_csv('./chili/mor.csv')

In [11]:
markers

Unnamed: 0,individual,Bd12,Ng10,Ng18,Ng20,Ng6,Ng7,Ng8
0,ind_1,2/2,1/1,4/4,1/1,2/2,0/0,2/2
1,ind_2,2/2,1/1,4/4,1/1,2/2,0/0,2/2
2,ind_3,2/2,1/1,3/3,1/1,2/2,2/2,2/2
3,ind_4,2/2,1/1,4/4,1/1,2/2,2/2,2/2
4,ind_5,2/2,1/1,4/4,1/1,2/2,2/2,2/2
...,...,...,...,...,...,...,...,...
109,ind_110,2/2,2/2,2/2,1/1,1/1,3/3,1/1
110,ind_111,0/0,2/2,2/2,1/1,1/1,3/3,1/1
111,ind_112,1/1,2/2,2/2,1/1,1/1,3/3,1/1
112,ind_113,1/1,2/2,2/2,1/1,1/1,3/3,1/1


In [12]:
traits

Unnamed: 0,individual,stem_width,stem_height,first_flower_day,fruits_number,fruit_length,fruit_diameter,fruit_weight,seeds_number,yield
0,ind_1,1.52,78.0,44.0,13,23.18,3.39,48.08,135.00,624.98
1,ind_2,1.36,74.0,45.0,9,25.05,3.08,48.10,164.00,432.90
2,ind_3,1.43,69.0,63.0,8,17.75,2.03,19.05,47.00,152.40
3,ind_4,1.53,68.0,44.0,12,21.50,3.04,46.58,156.00,558.90
4,ind_5,1.34,84.0,44.0,6,24.88,3.38,54.05,148.33,324.30
...,...,...,...,...,...,...,...,...,...,...
109,ind_110,1.29,45.0,83.0,0,3.69,2.59,8.25,51.64,0.00
110,ind_111,0.77,33.0,57.0,0,3.69,2.59,8.25,51.64,0.00
111,ind_112,1.52,59.0,69.0,0,3.69,2.59,8.25,51.64,0.00
112,ind_113,1.04,37.5,52.0,10,3.29,2.53,6.70,49.00,67.00


In [13]:
h2(markers,traits)

Type of markers detected: Microsatellites
Inbreeding coefficient F: 1.0

n_bootstrap: 1000

heritability_table:

              trait  heritability  standard_error  ci_lower  ci_upper
0        stem_width        0.0911          0.0075    0.0770    0.1054
1       stem_height        0.1483          0.0069    0.1350    0.1628
2  first_flower_day        0.2005          0.0074    0.1857    0.2142
3     fruits_number        0.1388          0.0107    0.1185    0.1604
4      fruit_length        0.9343          0.0090    0.9174    0.9518
5    fruit_diameter        0.4437          0.0107    0.4230    0.4648
6      fruit_weight        0.7902          0.0092    0.7728    0.8080
7      seeds_number        0.6425          0.0081    0.6265    0.6576
8             yield        0.1907          0.0086    0.1742    0.2083

To access more results, type: F, heterozygosity_table, relatedness_table, phenotypic_similarity_table, heritability_table.
To save all data, type:  save_run()


## Maize -SNPs

In [14]:
markers = pd.read_csv('./maize/maize_markers.csv')
traits = pd.read_csv('./maize/maize_mean_pheno.csv')

In [17]:
h2(markers,traits)

Type of markers detected: SNPs
Inbreeding coefficient F: 0.9740991551394081

Step 1/6: Checking input dataframe...
Step 2/6: Extracting individual IDs (first column)...
Step 3/6: Converting SNP data to numeric format...
Step 4/6: Computing correlation matrix (this may take a while)...
Step 5/6: Cleaning correlation matrix (replacing NaN with 0.0)...
Step 6/6: Building pairwise relatedness output...
Done! Pairwise relatedness successfully computed.
n_bootstrap: 1000

heritability_table:

           trait  heritability  standard_error  ci_lower  ci_upper
0    grain.yield        0.2164          0.0076    0.2020    0.2318
1   grain.number        0.1716          0.0081    0.1572    0.1887
2      seed.size        0.1145          0.0071    0.1007    0.1281
3       anthesis        0.2049          0.0088    0.1873    0.2216
4        silking        0.2150          0.0083    0.1976    0.2309
5   plant.height        0.0469          0.0076    0.0318    0.0621
6  tassel.height        0.0679         

In [18]:
heritability_table

Unnamed: 0,trait,heritability,standard_error,ci_lower,ci_upper
0,grain.yield,0.2164,0.0078,0.2015,0.2319
1,grain.number,0.1716,0.0079,0.1569,0.1881
2,seed.size,0.1145,0.0067,0.1007,0.1276
3,anthesis,0.2049,0.0088,0.1878,0.2216
4,silking,0.215,0.008,0.1986,0.2292
5,plant.height,0.0469,0.008,0.031,0.0631
6,tassel.height,0.0679,0.0092,0.0501,0.086
7,ear.height,0.0535,0.0082,0.0368,0.0693


# Mice - SNPs

In [19]:
markers = pd.read_csv('./mice/mice_markers.csv')
traits = pd.read_csv('./mice/mice_traits.csv')

In [20]:
h2(markers,traits)

Type of markers detected: SNPs
Inbreeding coefficient F: 0.026500146367681388

Step 1/6: Checking input dataframe...
Step 2/6: Extracting individual IDs (first column)...
Step 3/6: Converting SNP data to numeric format...
Step 4/6: Computing correlation matrix (this may take a while)...
Step 5/6: Cleaning correlation matrix (replacing NaN with 0.0)...
Step 6/6: Building pairwise relatedness output...
Done! Pairwise relatedness successfully computed.
n_bootstrap: 1000

heritability_table:

                trait  heritability  standard_error  ci_lower  ci_upper
0  Obesity.BodyLength         0.002          0.0007    0.0005    0.0035

To access more results, type: F, heterozygosity_table, relatedness_table, phenotypic_similarity_table, heritability_table.
To save all data, type:  save_run()
