# Create the phenotype files for the 50K and the 150K individuals

This division obeys the order of the releases and therefore the different exome capture kits that were used to WES every group of individuals 

In [None]:
library(tidyverse)
library(data.table)

In [None]:
ind50 <- read.table("~/UKBiobank_Yale_transfer/ukb28374_exomedata/ukb32285_exomespb_chr1_22.fam")
colnames(ind50) <- c("FID", "IID", "fatherID", "motherID", "sex", "phenotype")

In [None]:
head(ind50)

## f.3393

In [None]:
f3393_all <- read.table("~/UKBiobank/phenotype_files/hearing_impairment/080421_UKBB_Hearing_aid_f3393_expandedwhite_6305cases_98082ctrl", sep='\t', header=TRUE)

In [None]:
head(f3393_all)

In [None]:
#library(data.table)
f3393_50k <- setDT(f3393_all)[IID %chin% ind50$IID]
nrow(f3393_50k)

In [None]:
f3393_150k <- setDT(f3393_all)[!IID %chin% ind50$IID]
nrow(f3393_150k)

## Number of cases/controls in each set for f.3393

In [None]:
#There are 1755 cases in the 50K group and 22741 controls
f3393_50k[, .N, by=.(f3393)]
#There are 4550 cases in the 50K group and 75341 controls
f3393_150k[, .N, by=.(f3393)]

## f.2247

In [None]:
f2247_all <- read.table("~/UKBiobank/phenotype_files/hearing_impairment/080421_UKBB_Hearing_difficulty_f2247_expandedwhite_46237cases_98082ctrl", sep='\t', header=TRUE)

In [None]:
head(f2247_all)

In [None]:
f2247_50k <- setDT(f2247_all)[IID %chin% ind50$IID]
nrow(f2247_50k)

In [None]:
f2247_150k <- setDT(f2247_all)[!IID %chin% ind50$IID]
nrow(f2247_150k)

## Number of cases/controls in each set for f.2247

In [None]:
#There are 12406 cases in the 50K group and 22741 controls
f2247_50k[, .N, by=.(f2247)]
#There are 33831 cases in the 50K group and 75341 controls
f2247_150k[, .N, by=.(f2247)]

# f.2257

In [None]:
f2257_all <- read.table("~/UKBiobank/phenotype_files/hearing_impairment/080421_UKBB_Hearing_noise_f2257_expandedwhite_66656cases_98082ctrl", sep='\t', header=TRUE)

In [None]:
head(f2257_all)

In [None]:
f2257_50k <- setDT(f2257_all)[IID %chin% ind50$IID]
nrow(f2257_50k)

In [None]:
f2257_150k <- setDT(f2257_all)[!IID %chin% ind50$IID]
nrow(f2257_150k)

## Number of cases/controls in each set for f.2257

In [None]:
#There are 16603 cases in the 50K group and 22741 controls
f2257_50k[, .N, by=.(f2257)]
#There are 50053 cases in the 50K group and 75341 controls
f2257_150k[, .N, by=.(f2257)]

# Combined f.2247 & f.2257

In [None]:
combined_all <- read.table("~/UKBiobank/phenotype_files/hearing_impairment/080421_UKBB_Combined_f2247_f2257_expandedwhite_39049cases_98082ctrl", sep='\t', header=TRUE)

In [None]:
head(combined_all)

In [None]:
combined_50k <- setDT(combined_all)[IID %chin% ind50$IID]
nrow(combined_50k)

In [None]:
combined_150k <- setDT(combined_all)[!IID %chin% ind50$IID]
nrow(combined_150k)

In [None]:
#There are 10658 cases in the 50K group and 22741 controls
combined_50k[, .N, by=.(f2247_f2257)]
#There are 28391 cases in the 50K group and 75341 controls
combined_150k[, .N, by=.(f2247_f2257)]

## Analyze number of individuals with both exome data and genotype array data Qc'ed by Megan

In [None]:
exome_fam <- read.table('~/UKBiobank_Yale_transfer/ukb28374_exomedata/exome_data_OCT2020/ukb23155_s200631.fam')
colnames(exome_fam) <- c("FID", "IID", "fatherID", "motherID", "sex", "phenotype")

In [None]:
head(exome_fam)

In [None]:
genotype_fam <- read.table('~/UKBiobank_Yale_transfer/pleiotropy_geneticfiles/UKB_expandedwhite_qcgenotypefiles/UKB_expandedwhiteonly_phenotypeindepqc_410905indiv_528206snps_102720.fam')
colnames(genotype_fam) <- c("FID", "IID", "fatherID", "motherID", "sex", "phenotype")

In [None]:
head(genotype_fam)

In [None]:
exomed_wtih_genoarray <- setDT(exome_fam)[IID %chin% genotype_fam$IID]

In [None]:
nrow(exomed_wtih_genoarray)

In [None]:
white_exome_fam <- read.table('~/UKBiobank_Yale_transfer/pleiotropy_R01/ukb43978_OCT2020/dc2325_phenotypes/030821_ukb42495_exomed_white_189010ind')
colnames(white_exome_fam) <- c("FID", "IID")

In [None]:
exomed_white_genoarray <- setDT(white_exome_fam)[IID %chin% genotype_fam$IID]

In [None]:
nrow(exomed_white_genoarray)