No description or website provided.
Pull request Compare This branch is 2 commits ahead of TraMineR-Training:master.
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Failed to load latest commit information.
.gitignore
README.md
assignment.r

README.md

Assignment 8

library(TraMineR) library(TraMineRextras) library(cluster) library(WeightedCluster)

1. Create a formatted state sequence object from the biofam data

# Load biofam data

data(biofam)

# Create state names and labels

biofam.states <- c("P", "L", "M", "LM", "C", "LC", "LMC", "D") biofam.labels <- c("Parent","Left","Married","Left/Married","Child","Left/Child","Left/Married/Child","Divorced")

## Create a fully defined and formatted sequence object

biofam.seq <- seqdef(biofam[, 10:25], states=biofam.states, labels=biofam.labels, weights=biofam$wp00tbgs) head(biofam.seq)

## Build a state-properties matrix for states vs events; not a full matrix

properties <- matrix(c(# left, married, child, divorced 0, 0, 0, 0, # parent 1, 0, 0, 0, # left 0, 1, .5, 0, # marr 1, 1, 0, 0, # left+marr 0, 0, 1, 0, # child 1, 0, 1, 0, # left+child 1, 1, 1, 0, # left+marr+child .5, 1, .5, 1 # divorced ), 8, 4, byrow=TRUE)

sm <- as.matrix(dist(properties)) sm

## Create the OM distance matrix

indel <- .5*max(sm) dOM <- seqdist(biofam.seq, method="OM", indel=indel, sm=sm, full.matrix=FALSE)

2. Create a hierarchical cluster tree object with the Ward and McQUitty methods

## Get weights

weight <- attr(biofam.seq, "weights")

## Create hierarchical cluster trees

ward <- hclust(dOM, method="ward", members=weight) mcquitty <- hclust(dOM, method="mcquitty", members=weight)

3. Cluster tree Dendrograms side-by-side

par(mfrow=c(1,2)) plot(ward, labels=FALSE, main="Ward") plot(mcquitty, labels=FALSE, main="WPGMA")

4. Select and plot the 3-cluster colution from the Ward analysis

c3 <- cutree(ward, k=3) seqIplot(biofam.seq, group=c3, sort="from.end")

c3.labels <- c("Parental", "Left home, no children", "Still at home") c3.factor <- factor(c3, levels=c(1,2,3), labels=c3.labels) seqIplot(biofam.seq, group=c3.factor, sort="from.end")

5. Make a silhouette plot

plot(silhouette(c3, dmatrix=as.matrix(dOM)))

6. Look at unweighted and weighted partition quality measures

summary(sil.c3 <- silhouette(c3, dmatrix=as.matrix(dOM))) wcClusterQuality(dOM, c3, weights=weight)

# Unweighted mean silhouette width=0.3416, weighted=0.3559

7. Use logistic regressions to show how cluster membership is related to sex, birthyear,

and questionnaire language

reg1 <- glm((c3==1) ~ sex + birthyr + plingu02, family="binomial", data=biofam) summary(reg1) exp(reg1$coefficients) reg1.coeff <- as.data.frame(summary(reg1)$coefficients) reg1.coeff <- cbind(reg1.coeff, OR=exp(reg1.coeff[,"Estimate"])) reg1.coeff

# For Parental cluster, OR=1.58 for women, 0.47 for Italian speakers

reg2 <- glm((c3==2) ~ sex + birthyr + plingu02, family="binomial", data=biofam) summary(reg2) exp(reg2$coefficients) reg2.coeff <- as.data.frame(summary(reg2)$coefficients) reg2.coeff <- cbind(reg2.coeff, OR=exp(reg2.coeff[,"Estimate"])) reg2.coeff

# For Left home, no children, OR=1.04 for each year of birth

reg3 <- glm((c3==3) ~ sex + birthyr + plingu02, family="binomial", data=biofam) summary(reg3) exp(reg3$coefficients) reg3.coeff <- as.data.frame(summary(reg3)$coefficients) reg3.coeff <- cbind(reg3.coeff, OR=exp(reg3.coeff[,"Estimate"])) reg3.coeff

# For Still at home, OR=2.88 for Italians, 0.94 for each year of birth, 0.74 for women

8. Quality measure for PAM solutions for k=2,...,20

wardRange <- as.clustrange(ward, diss=dOM, weights=weight, ncluster=20) summary(wardRange, max.rank=2) plot(wardRange, stat=c("ASWw","HG","PBC"))

pamRange <- wcKMedRange(dOM, 2:20, weight=weight) plot(pamRange, stat=c("ASW","HG","PBC"))

# Retain k=6 clusters because it seems parsimonious with diminishing returns with k>6

9. Explore the 6-cluster PAM solution

# Look at the clusters, develop labels seqIplot(biofam.seq, group=pamRange$clustering$cluster6, border=NA)

c6.labels <- c("Delayed Parental", "Married, w parents", "Early parental", "Left, single", "Married", "Still at home") c6.factor <- factor(pamRange$clustering$cluster6, levels=c(1648,1903,1960,1992,248,6), labels=c6.labels) seqIplot(biofam.seq, group=c6.factor, sort="from.end") seqmtplot(biofam.seq, group=c6.factor)