In [1]:
%%capture
%run shared.ipynb

## Analyses used in thesis

These are among the statistical analyses used in the BA thesis. I am in the process of making this notebook tidier and more readable for others. 

In [2]:
#Function used to create table 2 (p. 32)

orientation_keys = {
    "LG" : (LG_plus, LG_only, "Gay or Lesbian"),
    "L" : (lesbian, only_lesbian, "Lesbian"),
    "G" : (gay, only_gay, "Gay"),
    "NBLG" : (nb_lg, only_nb_lg, "Gay or Lesbian (nonbinary)"),
    "Bi" : (bi_plus, bi_only, "Bi+"),
    "Q" : (q_plus, q_only, "Queer"),
    "Fluid" : (fluid_plus, fluid_only, "Fluid"),
    "Ace" : (ace_plus, ace_only, "Asexual and/or aromantic"),
    "Het" : (het_plus, het_only, "Heterosexual"),
    "Questioning" : (questioning_plus, questioning_only, "Questioning"),
    "None" : (none_plus, none_only, "NONE")
}


def print_group_info(key):
    x_plus, x_only, x_label = orientation_keys[key]
    
    x_only_count = len(x_only)
    x_plus_count = len(x_plus)
        
    x_only_frac = x_only_count/x_plus_count
    print(x_label, "respondents (inclusive):", len(x_plus))
    print(x_label, "only respondents:", len(x_only))
    print (f"{x_only_frac:.1%} of those who selected", x_label, "selected ONLY this orientation")
    

for key in orientation_keys.keys():
    print_group_info(key)
    print()

Gay or Lesbian respondents (inclusive): 291
Gay or Lesbian only respondents: 146
50.2% of those who selected Gay or Lesbian selected ONLY this orientation

Lesbian respondents (inclusive): 153
Lesbian only respondents: 78
51.0% of those who selected Lesbian selected ONLY this orientation

Gay respondents (inclusive): 103
Gay only respondents: 61
59.2% of those who selected Gay selected ONLY this orientation

Gay or Lesbian (nonbinary) respondents (inclusive): 35
Gay or Lesbian (nonbinary) only respondents: 7
20.0% of those who selected Gay or Lesbian (nonbinary) selected ONLY this orientation

Bi+ respondents (inclusive): 304
Bi+ only respondents: 101
33.2% of those who selected Bi+ selected ONLY this orientation

Queer respondents (inclusive): 280
Queer only respondents: 31
11.1% of those who selected Queer selected ONLY this orientation

Fluid respondents (inclusive): 89
Fluid only respondents: 8
9.0% of those who selected Fluid selected ONLY this orientation

Asexual and/or aromanti

In [3]:
np.mean(tnb_plus.Alltid_velkommen), np.std(tnb_plus.Alltid_velkommen)

(0.9593908629441624, 1.1257825860185013)

In [4]:
np.mean(cis.Trygge_rom_gen_B), np.std(cis.Trygge_rom_gen_B)

(1.2225130890052356, 0.9379804352698924)


Cis 
Personal mean = 1.43 (.89)
General: mean = 1.22 (.93)
difference of .20


Trans
Personal mean = .96 (1.12)
General: mean = .82 (1.05)
difference of .14

In [5]:
stats.spearmanr(alle_skeive.Skeiv_ID.fillna(0), alle_skeive.Synlighet.fillna(0))

SpearmanrResult(correlation=0.3396155254499248, pvalue=8.536874147372123e-19)

In [6]:
#cis_heterofile["age_weight"] = cis_heterofile.Alder.apply(lambda age: age_weights.loc[age].weight)
#cis_heterofile[["Alder", "age_weight"]]

In [7]:
cis_heterofile.Kjonnsidentitet.value_counts()

K     189
M      76
SU      2
Name: Kjonnsidentitet, dtype: int64

In [8]:
known_population_gender = pd.Series([.495, .515, .0], index=['K', 'M', 'SU'])
known_population_gender.name = 'known_population_gender'

In [9]:
gender_counts = cis_heterofile.Kjonnsidentitet.value_counts()
total = len(cis_heterofile.Kjonnsidentitet.dropna())
gender_fractions = gender_counts / total
gender_fractions.name = 'gender_fraction'
gender_fractions

K     0.707865
M     0.284644
SU    0.007491
Name: gender_fraction, dtype: float64

In [10]:
gender_weights = pd.concat([gender_fractions, known_population_gender], axis=1)

In [11]:
gender_weights["gender_weight"] = gender_weights["known_population_gender"] / gender_weights["gender_fraction"]
gender_weights

Unnamed: 0,gender_fraction,known_population_gender,gender_weight
K,0.707865,0.495,0.699286
M,0.284644,0.515,1.809276
SU,0.007491,0.0,0.0


In [12]:
#cis_heterofile["gender_weight"] = cis_heterofile.Kjonnsidentitet.apply(lambda gender: gender_weights.loc[gender].gender_weight)
#cis_heterofile[["Kjonnsidentitet", "gender_weight"]]

In [13]:
eldre = alle_skeive[alle_skeive.mapped_age > 1]
eldre.Sist_besok.value_counts()

Mindre enn 3 måneder siden    183
3-11 måneder siden             71
1-3 år siden                   37
6 år siden eller mer           26
3-6 år siden                   13
Less than 3 months ago          8
3-11 months ago                 4
Aldri                           3
1-3 years ago                   3
3-6 years ago                   1
Name: Sist_besok, dtype: int64

In [14]:
print("16-29 % som har besøkt siste året")
print((168+74+6+8)/len(s_Age_16_29))
print("Older:")
print((183+71+8+4)/len(eldre))

16-29 % som har besøkt siste året
0.8737201365187713
Older:
0.7621776504297995


In [15]:
one_O.Alder.value_counts()

30-44      128
16-29       96
45-60       45
Over_60     10
Name: Alder, dtype: int64

In [16]:
multiple_O.Alder.value_counts()

16-29      88
30-44      44
45-60      12
Over_60     2
Name: Alder, dtype: int64

In [17]:
alle_skeive.Alder.value_counts()

16-29      293
30-44      246
45-60       86
Over_60     17
Name: Alder, dtype: int64

In [18]:
44/246
88/293

0.3003412969283277

In [19]:
q = "Helt_meg_selv"

sub_groups = [binary_trans,
nonbinary,
questioning_gender,
             ]

tnb_plus;

In [20]:
all_nr = pd.concat(sub_groups).NR
nr_counts = all_nr.value_counts()
doubles = nr_counts[nr_counts > 1]
all_nr[all_nr.isin(doubles.index)]
df.loc[df.NR.isin(doubles.index), ["Cis", "Kjonnsidentitet"]]

Unnamed: 0,Cis,Kjonnsidentitet
23,SU,IB
96,SU,IB
98,SU,IB
143,SU,IB
180,SU,IB
185,SU,IB
187,SU,IB
238,SU,IB
317,SU,IB
377,SU,IB


In [21]:
overlap = tnb_plus.NR.isin(all_nr)
tnb_plus.loc[overlap == False, ["NR", "Cis", "Kjonnsidentitet", q]]


Unnamed: 0,NR,Cis,Kjonnsidentitet,Helt_meg_selv
216,12543235,N,SU,-1.0
248,12548606,N,NONE,1.0
261,12553119,N,NONE,1.0
338,12574096,N,NONE,-1.0
389,12586341,N,NONE,-1.0
423,12629348,N,NONE,-1.0
462,12641332,N,NONE,1.0
467,12651201,N,NONE,0.0
506,12689366,N,SU,


In [30]:
#Attempts at weighting based on age

brunnermunzel_test(s_R.Utrygt_stille_spm.dropna(), eldre.Utrygt_stille_spm.dropna(), ttype=0, alpha=.05)
age_counts = cis_heterofile.Alder.value_counts()
total = len(cis_heterofile.Alder.dropna())
age_fractions = age_counts / total
age_fractions.name = 'age_fraction'
age_fractions

known_population = pd.Series([.19, .202, .202, .237], index=['16-29', '30-44', '45-60', 'Over_60'])
known_population.name = 'known_population'
age_weights = pd.concat([age_fractions, known_population], axis=1)


Brunner-Munzel teststatistikk:-2.46 
p-verdi:0.014993764099184848 
CL effektstørrelse = G1: 0.583 / G2: 0.417  
95% konfidensintervall: ±0.067 (lav = 0.35, høy = 0.484) 
frihetsgrad:153.84524965711566 


In [29]:
age_weights["weight"] = age_weights["known_population"] / age_weights["age_fraction"]
age_weights

Unnamed: 0,age_fraction,known_population,weight
30-44,0.475655,0.202,0.424677
45-60,0.258427,0.202,0.781652
16-29,0.161049,0.19,1.179767
Over_60,0.104869,0.237,2.259964
