# Preparing the Data

In [1]:
cd

C:\Users\Rafael\Eco 797 - Final Code\Analysis


## March CPS 1964 - 2020

In [2]:
/*I obtain march CPS data (ASEC) for survey years 1964-2020 */

use "..\Data\cps"
describe
summarize

### Addressing missing observations

In [3]:
/*I address missing observations */

replace region=. if region==97
replace race=. if race== 999 // blank 
replace sex=. if sex ==0
replace ahrsworkt=. if ahrsworkt==999 // Missing
replace classwkr = . if inlist(classwkr, 99) // NIU, Missing
replace uhrsworkly = . if uhrsworkly == 999   // NIU, Missing
replace classwly=. if inlist(classwly, 00, 99) & inrange(year, 1964, 1975) // NIU, Missing/Unknown
replace classwly=. if classwly==0 & year>=1976 // ensuring year is not missing
replace wkswork1=. if wkswork1==0 & year>=1976
replace wkswork2 = . if inlist(wkswork2, 0, 9) // NIU, Missing data
replace fullpart = . if inlist(fullpart, 0, 9) // NIU, Unknown
replace educ=. if inlist(educ, 000, 999) // NIU or no schooling, NIU or blank, Missing/Unknown
replace higrade = . if inlist(higrade, 0, 999) // NIU, Missing/Unknown

replace incwage=. if incwage==99999998 & year==1964
replace incwage=. if incwage==99999999 & year>=1965 // N.I.U. (Not in Universe)/ Missing (1962-1966 only)
replace inclongj=. if inclongj==99999999
replace oincwage=. if oincwage==99999999


(218 real changes made, 218 to missing)

(0 real changes made)

(0 real changes made)

(5,238,255 real changes made, 5,238,255 to missing)

(21 real changes made, 21 to missing)

(3,817,871 real changes made, 3,817,871 to missing)

(706,126 real changes made, 706,126 to missing)

(3,817,862 real changes made, 3,817,862 to missing)

(3,817,871 real changes made, 3,817,871 to missing)

(4,523,997 real changes made, 4,523,997 to missing)

(4,524,000 real changes made, 4,524,000 to missing)

(9 real changes made, 9 to missing)

(900,643 real changes made, 900,643 to missing)

(4 real changes made, 4 to missing)

(2,150,737 real changes made, 2,150,737 to missing)

(1,392,818 real changes made, 1,392,818 to missing)

(1,392,818 real changes made, 1,392,818 to missing)


### Full time, full-year workers
#### FTFY, defined as as those who work 35 hours per week, and forty-plus weeks in the prior year.

In [4]:
/* FTFY, defined as as those who work 35-plus hours per week, and forty-plus weeks in the prior year. 

I create a variable for full time workers defined as as those who work 35-plus hours per week in 
the last year. */

gen fulltime = fullpart == 1 
label value fulltime FULLTIME
label variable fulltime "usually worked 35+ hours per week last year"

/* I create a variable for workers who worked forty-plus weeks in the prior year (fullyear). We note
that from 1964 to 1975, weeks worked last year (wkswork1) were not recorded, but were recorde in an 
intervalled manner, in variable (wkswork2) and so we impute these values from wkswork2 1964 to 1975. */ 

tabstat wkswork1 if year>=1976 [w=asecwt], by(wkswork2) statistics(mean) format(%8.2fc) save
return list
  scalar m1=r(Stat1)[1,1]
  scalar m2=r(Stat2)[1,1]
  scalar m3=r(Stat3)[1,1]
  scalar m4=r(Stat4)[1,1]
  scalar m5=r(Stat5)[1,1]
  scalar m6=r(Stat6)[1,1]
  
  replace wkswork1=m1 if wkswork2==1 & year<=1975
  replace wkswork1=m2 if wkswork2==2 & year<=1975
  replace wkswork1=m3 if wkswork2==3 & year<=1975
  replace wkswork1=m4 if wkswork2==4 & year<=1975
  replace wkswork1=m5 if wkswork2==5 & year<=1975
  replace wkswork1=m6 if wkswork2==6 & year<=1975


  tab year wkswork1 if year<=1975
  
  scalar drop _all

gen fullyear = wkswork1 >= 40 & wkswork1 <= 52  
label value fullyear FULYEAR
label variable fullyear  "workers~40-plus weeks worked"






(analytic weights assumed)

Summary for variables: wkswork1
     by categories of: wkswork2 (weeks worked last year, intervalled)

    wkswork2 |      mean
-------------+----------
  1-13 weeks |      7.99
 14-26 weeks |     21.56
 27-39 weeks |     33.46
 40-47 weeks |     42.50
 48-49 weeks |     48.25
 50-52 weeks |     51.93
-------------+----------
       Total |     45.24
------------------------


macros:
              r(name6) : "50-52 weeks"
              r(name5) : "48-49 weeks"
              r(name4) : "40-47 weeks"
              r(name3) : "27-39 weeks"
              r(name2) : "14-26 weeks"
              r(name1) : "1-13 weeks"

matrices:
              r(Stat6) :  1 x 1
              r(Stat5) :  1 x 1
              r(Stat4) :  1 x 1
              r(Stat3) :  1 x 1
              r(Stat2) :  1 x 1
              r(Stat1) :  1 x 1
          r(StatTotal) :  1 x 1







variable wkswork1 was byte now float
(83,277 real changes made)

(65,850 real changes made)

(54,336 real

###  Drop armed forces and children,  topcoded age

In [5]:
/* Individuals in the armed forces are dropped, I keep individuals who are at least 
17 years of age (during the earning year they were 16) and worked 1 to 52 weeks */

keep if popstat==1 & wkswork1 >= 1 & wkswork1 <= 52 & age >=17 

/* age is top coded at 90 or 99 through the years, I create a consistent measure */

replace age=90 if age >90 




(4,642,365 observations deleted)

(107 real changes made)


### Generate Region variables, Sex and Race dummies

In [6]:
/* We create region dummies */ 

gen northeast = region == 11 | region ==12
gen midwest = region == 21 | region == 22
gen south = region == 31 | region == 32 | region == 33
gen west = region == 41 | region == 42 


/* Race dummies */

generate black = (race ==200)
label value black BLACK
label variable black "black race"


generate white = (race ==100)
label value white WHITE
label variable white "white race"

generate other_race = race >=300
label value other_race OTHER_RACE
label variable other_race "other race"


/* Sex dummies */
generate female = (sex==2)
label value female FEMALE
label variable female "female"



#### Before we select workers from 0 to 39 years of experience, we recode for education. 

### Recoding Education

We first recode the education variable educ to create the variable grade, which measures highest grade completed. We use Park (1994) to obtain our experience variable.

In [7]:
/* Recode grade variable */
*drop if educ == .
generate grade=.
*** 1964-1991
  replace grade=0  if educ==2 & year<=1991 // none or preschool
  replace grade=1  if educ==11 & year<=1991 // grade 1
  replace grade=2  if educ==12 & year<=1991 // grade 2
  replace grade=3  if educ==13 & year<=1991 // grade 3
  replace grade=4  if educ==14 & year<=1991 // grade 4
  replace grade=5  if educ==21 & year<=1991 // grade 5
  replace grade=6  if educ==22 & year<=1991 // grade 6
  replace grade=7  if educ==31 & year<=1991 // grade 7
  replace grade=8  if educ==32 & year<=1991 // grade 8 
  replace grade=9  if educ==40 & year<=1991 // grade 9
  replace grade=10 if educ==50 & year<=1991 // grade 10
  replace grade=11 if educ==60 & year<=1991 // grade 11
  replace grade=12 if (educ==72 | educ==73) & year<=1991 // 12th grade, diploma unclear, high school diploma
  replace grade=13 if educ==80 & year<=1991 // 1 year of college
  replace grade=14 if educ==90 & year<=1991 // 2 years of college
  replace grade=15 if educ==100 & year<=1991 // 3 years of college
  replace grade=16 if educ==110 & year<=1991 // 4 years of college
  replace grade=17 if educ==121 & year<=1991 // 5 years of college
  replace grade=18 if educ==122 & year<=1991 // 6 years of college or more

** We have to add (Park 1994) values to calculate experience

* men, white
replace grade = .32  if (white==1 & female==0 & educ==1 | educ == 2) & year>=1992
replace grade = 3.19 if (white==1 & female==0 & educ ==10) & year>=1992
replace grade = 7.24 if (white==1 & female==0 & (educ ==20 | educ==30)) & year>=1992
replace grade = 8.97 if (white==1 & female==0 & educ == 40) & year>=1992
replace grade = 9.92 if (white==1 & female==0 & educ == 50) & year>=1992
replace grade = 10.86 if (white==1 & female==0 & educ ==60) & year>=1992
replace grade = 11.58 if (white==1 & female==0 & educ ==71) & year>=1992
replace grade = 11.99 if (white==1 & female==0 & educ ==73) & year>=1992
replace grade = 13.48 if (white==1 & female==0 & educ ==81) & year>=1992
replace grade = 14.23 if (white==1 & female==0 & (educ ==91 | educ==92)) & year>=1992
replace grade = 16.17 if (white==1 & female==0 & educ ==111) & year>=1992
replace grade = 17.68 if (white==1 & female==0 & educ ==123) & year>=1992
replace grade = 17.71 if (white==1 & female==0 & educ ==124) & year>=1992
replace grade = 17.83 if (white==1 & female==0 & educ ==125) & year>=1992


* female, white
replace grade = 0.62 if (white==1 & female==1 & educ==1 | educ == 2) & year>=1992
replace grade = 3.15 if (white==1 & female==1 & educ ==10) & year>=1992
replace grade = 7.23 if (white==1 & female==1 & (educ ==20 | educ==30)) & year>=1992
replace grade = 8.99 if (white==1 & female==1 & educ == 40) & year>=1992
replace grade = 9.95 if (white==1 & female==1 & educ == 50) & year>=1992
replace grade = 10.87 if (white==1 & female==1 & educ ==60) & year>=1992
replace grade = 11.73 if (white==1 & female==1 & educ ==71) & year>=1992
replace grade = 12.00 if (white==1 & female==1 & educ ==73) & year>=1992
replace grade = 13.35 if (white==1 & female==1 & educ ==81) & year>=1992
replace grade = 14.22 if (white==1 & female==1 & (educ ==91 | educ==92)) & year>=1992
replace grade = 16.15 if (white==1 & female==1 & educ ==111) & year>=1992
replace grade = 17.64 if (white==1 & female==1 & educ ==123) & year>=1992
replace grade = 17.00 if (white==1 & female==1 & educ ==124) & year>=1992
replace grade = 17.76 if (white==1 & female==1 & educ ==125) & year>=1992

* men, black
replace grade = .92  if (black==1 & female==0  & educ==1 | educ == 2) & year>=1992
replace grade = 3.28 if (black==1 & female==0 & educ ==10) & year>=1992
replace grade = 7.04 if (black==1 & female==0 & (educ ==20 | educ==30)) & year>=1992
replace grade = 9.02 if (black==1 & female==0 & educ == 40) & year>=1992
replace grade = 9.91 if (black==1 & female==0 & educ == 50) & year>=1992
replace grade = 10.90 if (black==1 & female==0 & educ ==60) & year>=1992
replace grade = 11.41 if (black==1 & female==0 & educ ==71) & year>=1992
replace grade = 11.98 if (black==1 & female==0 & educ ==73) & year>=1992
replace grade = 13.57 if (black==1 & female==0 & educ ==81) & year>=1992
replace grade = 14.33 if (black==1 & female==0 & (educ ==91 | educ==92)) & year>=1992
replace grade = 16.13 if (black==1 & female==0 & educ ==111) & year>=1992
replace grade = 17.51 if (black==1 & female==0 & educ ==123) & year>=1992
replace grade = 17.83 if (black==1 & female==0 & educ ==124) & year>=1992
replace grade = 18.00 if (black==1 & female==0 & educ ==125) & year>=1992

* female, black
replace grade = 0.00 if (black==1 & female==1 & educ==1 | educ == 2) & year>=1992
replace grade = 2.90 if (black==1 & female==1 & educ ==10) & year>=1992
replace grade = 7.03 if (black==1 & female==1 & (educ ==20 | educ==30)) & year>=1992
replace grade = 9.05 if (black==1 & female==1 & educ == 40) & year>=1992
replace grade = 9.99 if (black==1 & female==1 & educ == 50) & year>=1992
replace grade = 10.85 if (black==1 & female==1 & educ ==60) & year>=1992
replace grade = 11.64 if (black==1 & female==1 & educ ==71) & year>=1992
replace grade = 12.00 if (black==1 & female==1 & educ ==73) & year>=1992
replace grade = 13.43 if (black==1 & female==1 & educ ==81) & year>=1992
replace grade = 14.33 if (black==1 & female==1 & (educ ==91 | educ==92)) & year>=1992
replace grade = 16.04 if (black==1 & female==1 & educ ==111) & year>=1992
replace grade = 17.69 if (black==1 & female==1 & educ ==123) & year>=1992
replace grade = 17.40 if (black==1 & female==1 & educ ==124) & year>=1992
replace grade = 18.00 if (black==1 & female==1 & educ ==125) & year>=1992

* men, other
replace grade = .62  if (other_race==1 & female==0 & educ==1 | educ == 2) & year>=1992
replace grade = 3.24 if (other_race==1 & female==0 & educ ==10) & year>=1992
replace grade = 7.14 if (other_race==1 & female==0 & (educ ==20 | educ==30)) & year>=1992
replace grade = 9.00 if (other_race==1 & female==0 & educ == 40) & year>=1992
replace grade = 9.92 if (other_race==1 & female==0 & educ == 50) & year>=1992
replace grade = 10.88 if (other_race==1 & female==0 & educ ==60) & year>=1992
replace grade = 11.50 if (other_race==1 & female==0 & educ ==71) & year>=1992
replace grade = 11.99 if (other_race==1 & female==0 & educ ==73) & year>=1992
replace grade = 13.53 if (other_race==1 & female==0 & educ ==81) & year>=1992
replace grade = 14.28 if (other_race==1 & female==0 & (educ ==91 | educ==92)) & year>=1992
replace grade = 16.15 if (other_race==1 & female==0 & educ ==111) & year>=1992
replace grade = 17.60 if (other_race==1 & female==0 & educ ==123) & year>=1992
replace grade = 17.77 if (other_race==1 & female==0 & educ ==124) & year>=1992
replace grade = 17.92 if (other_race==1 & female==0 & educ ==125) & year>=1992

* female, other
replace grade = 0.31 if (other_race==1 & female==1 & educ==1 | educ == 2) & year>=1992
replace grade = 3.03 if (other_race==1 & female==1 & educ ==10) & year>=1992
replace grade = 7.13 if (other_race==1 & female==1 & (educ ==20 | educ==30)) & year>=1992
replace grade = 9.02 if (other_race==1 & female==1 & educ == 40) & year>=1992
replace grade = 9.97 if (other_race==1 & female==1 & educ == 50) & year>=1992
replace grade = 10.86 if (other_race==1 & female==1 & educ ==60) & year>=1992
replace grade = 11.69 if (other_race==1 & female==1 & educ ==71) & year>=1992
replace grade = 12.00 if (other_race==1 & female==1 & educ ==73) & year>=1992
replace grade = 13.47 if (other_race==1 & female==1 & educ ==81) & year>=1992
replace grade = 14.28 if (other_race==1 & female==1 & (educ ==91 | educ==92)) & year>=1992
replace grade = 16.10 if (other_race==1 & female==1 & educ ==111) & year>=1992
replace grade = 17.67 if (other_race==1 & female==1 & educ ==123) & year>=1992
replace grade = 17.20 if (other_race==1 & female==1 & educ ==124) & year>=1992
replace grade = 17.88 if (other_race==1 & female==1 & educ ==125) & year>=1992

** We label grade
  label value grade GRADE
  label variable grade  "highest grade completed"


(4,611,418 missing values generated)

(7,040 real changes made)

(2,315 real changes made)

(5,233 real changes made)

(8,982 real changes made)

(11,662 real changes made)

(14,462 real changes made)

(28,717 real changes made)

(33,300 real changes made)

(109,270 real changes made)

(74,126 real changes made)

(111,630 real changes made)

(114,818 real changes made)

(762,708 real changes made)

(131,233 real changes made)

(148,788 real changes made)

(60,725 real changes made)

(196,132 real changes made)

(42,019 real changes made)

(93,327 real changes made)

(5,317 real changes made)

(8,837 real changes made)

(40,960 real changes made)

(20,236 real changes made)

(28,931 real changes made)

(39,838 real changes made)

(16,359 real changes made)

(349,960 real changes made)

(211,207 real changes made)

(95,897 real changes made)

(217,660 real changes made)

(75,702 real changes made)

(23,084 real changes made)

(19,932 real changes made)

(5,317 real changes made)

(4,404

### Generate  experience variable 

In [8]:
/*Generate experience variable */ 

gen experience = max(age-grade-7,0) if year <= 1991
replace experience = max(min(age-grade-7,age-17),0) if year >=1992
label value experience EXPERIENCE
label variable experience "experience"



(2,654,922 missing values generated)

(2,654,922 real changes made)




### Genearte Educational categories

In [9]:
/* Generate educational categories according to AKK 2008 5 schooling categories */

drop if educ ==.
generate schooling1 = educ <= 60 if year <=1991 // high school dropout
generate schooling2 = educ == 72 if year <=1991 // high school diploma
generate schooling3 = (educ >= 80 & educ <= 100) | educ ==73 if year <=1991  // some college 
generate schooling4 =  (educ==110 | educ==121) if year <=1991 // college graduate
generate schooling5 =  educ >= 122 if year <=1991 // post-college

replace schooling1 =  educ <= 60 if year >=1992 // high school dropout
replace schooling2 =  (educ==71 | educ==73) if year >=1992 // high school diploma
replace schooling3 =  (educ >= 81 & educ <= 92) if year >=1992  // some college 
replace schooling4 =  educ == 111 if year >=1992 // college graduate 
replace schooling5 =  educ >=123 if year >=1992 // post-college

assert schooling1 + schooling2 + schooling3 + schooling4 + schooling5 == 1

gen schooling = schooling1 + 2*schooling2 + 3*schooling3 + 4*schooling4 + 5*schooling5
label define schooling 1 "HS Dropout" 2 "HS Diploma" 3 "Some College" 4 "College Grad" 5 "College-Plus"
label value schooling SCHOOLING
assert schooling > 0 

tab schooling 


(9 observations deleted)

(2,654,922 missing values generated)

(2,654,922 missing values generated)

(2,654,922 missing values generated)

(2,654,922 missing values generated)

(2,654,922 missing values generated)

(2,654,922 real changes made)

(2,654,922 real changes made)

(2,654,922 real changes made)

(2,654,922 real changes made)

(2,654,922 real changes made)







  schooling |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    800,161       17.35       17.35
          2 |  1,503,748       32.61       49.96
          3 |  1,185,730       25.71       75.67
          4 |    754,034       16.35       92.03
          5 |    367,736        7.97      100.00
------------+-----------------------------------
      Total |  4,611,409      100.00


### Top code hours

In [10]:
/* Top code hours */

replace ahrsworkt = 0 if ahrsworkt == 99 & inrange(year, 1964, 1967)
replace ahrsworkt = 98 if ahrsworkt == 99 & inrange(year, 1968, 2020)
replace uhrsworkly = 98 if uhrsworkly == 99 & inrange(year, 1968, 2020)



(0 real changes made)

(9,890 real changes made)

(6,963 real changes made)


### Impute missing hours for ahrsworkt 

In [11]:
/*Exclude negative weights. */
keep if asecwt >=0 & asecwt~=.


/* Impute missing hours for ahrswort from 1964 to 1975, based on sex, fulltime/partime statu, 
According to Katz and Murphy (1992) */

egen sex_fullpart_year = group(female fullpart year) if year <=1975, label


tabstat ahrsworkt if year <= 1975 [w=asecwt], by (sex_fullpart_year) statistics(mean) format(%8.2fc) save
return list
scalar m1=r(Stat1)[1,1]
scalar m2=r(Stat2)[1,1]
scalar m3=r(Stat3)[1,1]
scalar m4=r(Stat4)[1,1]
scalar m5=r(Stat5)[1,1]
scalar m6=r(Stat6)[1,1]
scalar m7=r(Stat7)[1,1]
scalar m8=r(Stat8)[1,1]
scalar m9=r(Stat9)[1,1]
scalar m10=r(Stat10)[1,1]
scalar m11=r(Stat11)[1,1]
scalar m12=r(Stat12)[1,1]
scalar m13=r(Stat13)[1,1]
scalar m14=r(Stat14)[1,1]
scalar m15=r(Stat15)[1,1]
scalar m16=r(Stat16)[1,1]
scalar m17=r(Stat17)[1,1]
scalar m18=r(Stat18)[1,1]
scalar m19=r(Stat19)[1,1]
scalar m20=r(Stat20)[1,1]
scalar m21=r(Stat21)[1,1]
scalar m22=r(Stat22)[1,1]
scalar m23=r(Stat23)[1,1]
scalar m24=r(Stat24)[1,1]
scalar m25=r(Stat25)[1,1]
scalar m26=r(Stat26)[1,1]
scalar m27=r(Stat27)[1,1]
scalar m28=r(Stat28)[1,1]
scalar m29=r(Stat29)[1,1]
scalar m30=r(Stat30)[1,1]
scalar m31=r(Stat31)[1,1]
scalar m32=r(Stat32)[1,1]
scalar m33=r(Stat33)[1,1]
scalar m34=r(Stat34)[1,1]
scalar m35=r(Stat35)[1,1]
scalar m36=r(Stat36)[1,1]
scalar m37=r(Stat37)[1,1]
scalar m38=r(Stat38)[1,1]
scalar m39=r(Stat39)[1,1]
scalar m40=r(Stat40)[1,1]
scalar m41=r(Stat41)[1,1]
scalar m42=r(Stat42)[1,1]
scalar m43=r(Stat43)[1,1]
scalar m44=r(Stat44)[1,1]
scalar m45=r(Stat45)[1,1]
scalar m46=r(Stat46)[1,1]
scalar m47=r(Stat47)[1,1]
scalar m48=r(Stat48)[1,1]

*** Males-Fulltime
replace ahrsworkt=m1 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1964
replace ahrsworkt=m2 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1965
replace ahrsworkt=m3 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1966
replace ahrsworkt=m4 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1967
replace ahrsworkt=m5 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1968
replace ahrsworkt=m6 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1969
replace ahrsworkt=m7 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1970
replace ahrsworkt=m8 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1971
replace ahrsworkt=m9 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1972
replace ahrsworkt=m10 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1973
replace ahrsworkt=m11 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1974
replace ahrsworkt=m12 if ahrsworkt == . & female==0 & fullpart == 1 & year ==1975

*** Males-Partime
replace ahrsworkt=m13 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1964
replace ahrsworkt=m14 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1965
replace ahrsworkt=m15 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1966
replace ahrsworkt=m16 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1967
replace ahrsworkt=m17 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1968
replace ahrsworkt=m18 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1969
replace ahrsworkt=m19 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1970
replace ahrsworkt=m20 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1971
replace ahrsworkt=m21 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1972
replace ahrsworkt=m22 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1973
replace ahrsworkt=m23 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1974
replace ahrsworkt=m24 if ahrsworkt == . & female==0 & fullpart == 2 & year ==1975

*** Females-Fulltime
replace ahrsworkt=m25 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1964
replace ahrsworkt=m26 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1965
replace ahrsworkt=m27 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1966
replace ahrsworkt=m28 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1967
replace ahrsworkt=m29 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1968
replace ahrsworkt=m30 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1969
replace ahrsworkt=m31 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1970
replace ahrsworkt=m32 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1971
replace ahrsworkt=m33 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1972
replace ahrsworkt=m34 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1973
replace ahrsworkt=m35 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1974
replace ahrsworkt=m36 if ahrsworkt == . & female==1 & fullpart == 1 & year ==1975

*** Females-Partime
replace ahrsworkt=m37 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1964
replace ahrsworkt=m38 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1965
replace ahrsworkt=m39 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1966
replace ahrsworkt=m40 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1967
replace ahrsworkt=m41 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1968
replace ahrsworkt=m42 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1969
replace ahrsworkt=m43 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1970
replace ahrsworkt=m44 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1971
replace ahrsworkt=m45 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1972
replace ahrsworkt=m46 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1973
replace ahrsworkt=m47 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1974
replace ahrsworkt=m48 if ahrsworkt == . & female==1 & fullpart == 2 & year ==1975



(348 observations deleted)

(3,927,490 missing values generated)

(analytic weights assumed)

Summary for variables: ahrsworkt
     by categories of: sex_fullpart_year (group(female fullpart year))

sex_fullpart_year |      mean
------------------+----------
 0 full-time 1964 |     45.03
 0 full-time 1965 |     45.16
 0 full-time 1966 |     45.41
 0 full-time 1967 |     44.96
 0 full-time 1968 |     44.80
 0 full-time 1969 |     44.67
 0 full-time 1970 |     44.59
 0 full-time 1971 |     44.30
 0 full-time 1972 |     44.54
 0 full-time 1973 |     44.53
 0 full-time 1974 |     44.20
 0 full-time 1975 |     43.46
 0 part-time 1964 |     24.90
 0 part-time 1965 |     23.55
 0 part-time 1966 |     25.35
 0 part-time 1967 |     25.38
 0 part-time 1968 |     25.50
 0 part-time 1969 |     24.36
 0 part-time 1970 |     22.25
 0 part-time 1971 |     22.90
 0 part-time 1972 |     23.78
 0 part-time 1973 |     24.03
 0 part-time 1974 |     24.44
 0 part-time 1975 |     23.03
 1 full-time 1964 | 

### Generate hours worked per week last year

In [12]:
/* Generate hours worked per week last year */

generate weekly_hours_workedly = ahrsworkt if inrange(year, 1964, 1975)
replace weekly_hours_workedly = uhrsworkly if inrange(year, 1976, 2020)
label value weekly_hours_workedly HOURSWLY
label variable weekly_hours_workedly "usual hours worked per week last year "


(3,927,490 missing values generated)

(3,927,490 real changes made)




### Workers whose class of work in their longest job was private or government wage/salary employment

In [13]:
/* Workers whose class of work last year was private or government 
wage/salary employment. */ 

generate wageworker = classwly ==22 | classwly == 24 if inrange(year, 1964, 1975)
replace wageworker = classwly >= 22 & classwly <= 28 if inrange(year, 1976,2020)

generate selfemployed = classwly == 10 if inrange(year, 1964, 1975) 
replace selfemployed = classwly >=13 & classwly <= 14 if inrange(year, 1976,2020)

label value wageworker WAGEWORKER
label variable wageworker "class of work was private or gov't wage"

label value selfemployed SELFEMP
label variable selfemployed "class of work was selfemployed wage"

keep if wageworker==1 | selfemployed==1



(3,927,490 missing values generated)

(3,927,490 real changes made)

(3,927,490 missing values generated)

(3,927,490 real changes made)





(25,719 observations deleted)


### Full time, full-year workers
#### FTFY, defined as as those who work 35 hours per week, and forty-plus weeks in the prior year.


### Excluding allocated earnings 

In [14]:
/* We exclude allocated earnings */ 

replace qinclong = inlist(qinclong, 1, 3)
gen allocated_earnings = 0 
replace allocated_earnings = 1 if qincwage == 1 & inrange(year, 1968, 1975)
replace allocated_earnings = 1 if qincwage >= 1 & qincwage <= 3 & inrange(year, 1976, 1987)
replace allocated_earnings = 1 if qinclong == 1 & inrange(year, 1988, 2020) & srcearn == 1
replace allocated_earnings = 1 if qoincwage == 1  & inrange(year, 1988, 2020) & srcearn == 1 
tab allocated_earnings
label variable allocated_earnings "allocated_earnings"



(1,628,182 real changes made)


(64,981 real changes made)

(153,862 real changes made)

(467,980 real changes made)

(9,886 real changes made)


allocated_e |
    arnings |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |  3,888,633       84.81       84.81
          1 |    696,709       15.19      100.00
------------+-----------------------------------
      Total |  4,585,342      100.00



### Top coding wages

In [15]:
/* We top code wage */

gen topcode_incwage = .
replace topcode_incwage = 0 if incwage != . & inrange(year, 1964,1987)
replace topcode_incwage = 1 if incwage == 99999 & inrange(year, 1964, 1975)
replace topcode_incwage = 1 if incwage == 50000 & inrange(year, 1976, 1981)
replace topcode_incwage = 1 if incwage == 75000 & inrange(year, 1982, 1984)
replace topcode_incwage = 1 if incwage == 99999 & inrange(year, 1985, 1987)
tab year topcode_incwage, missing

gen topcode_inclongj = .
replace topcode_inclongj = 0 if inclongj !=. & inrange(year, 1988,2020)
replace topcode_inclongj = 1 if inclongj >=99999  & inrange(year, 1988, 1995)
replace topcode_inclongj = 1 if inclongj >=150000 & inrange(year, 1996, 2002)
replace topcode_inclongj=1 if inclongj >=200000 & inrange(year, 2003, 2010)
replace topcode_inclongj=1 if inclongj >=250000 & inrange(year, 2011, 2014)
replace topcode_inclongj=1 if inclongj >=280000 & year==2015
replace topcode_inclongj=1 if inclongj >=300000 & inrange(year, 2016, 2018)
replace topcode_inclongj=1 if inclongj >=310000 & year==2019
replace topcode_inclongj=1 if inclongj >=360000 & year==2020
tab year topcode_inclongj, missing

gen topcode_oincwage = .
replace topcode_oincwage = 0 if oincwage !=. & inrange(year, 1988,2020)
replace topcode_oincwage = 1 if oincwage >=99999  & inrange(year, 1988, 1995)
replace topcode_oincwage = 1 if oincwage >=25000  & inrange(year, 1996, 2002)
replace topcode_oincwage = 1 if oincwage>=35000 & inrange(year, 2003, 2010)
replace topcode_oincwage = 1 if oincwage>=47000 & inrange(year, 2011, 2014)
replace topcode_oincwage = 1 if oincwage>=56000 & year==2015
replace topcode_oincwage = 1 if oincwage>=55000 & inrange(year, 2016, 2017)
replace topcode_oincwage = 1 if oincwage>=56000 & year==2018
replace topcode_oincwage = 1 if oincwage>=60000 & year==2019
replace topcode_oincwage = 1 if oincwage>=70000 & year==2020
tab year topcode_inclongj, missing

generate topcode= topcode_incwage if year<=1987
replace  topcode=1 if (topcode_inclongj==1 | topcode_oincwage==1) & year>=1988
replace  topcode=0 if (topcode_inclongj==0 & topcode_oincwage==0) & year>=1988
label variable topcode "Top Coded Values of Wages" // To rename variable labels
label define TOPCODE 0 "0-Not Top Coded" 1 "1-Top Coded" // To rename labels, so it's not just 1 and 0 
label value topcode TOPCODE
tab year topcode, missing 



(4,585,342 missing values generated)

(1,614,373 real changes made)

(0 real changes made)

(3,112 real changes made)

(1,221 real changes made)

(984 real changes made)


    survey |         topcode_incwage
      year |         0          1          . |     Total
-----------+---------------------------------+----------
      1964 |    30,089          0          2 |    30,091 
      1965 |    30,338          0          0 |    30,338 
      1966 |    64,964          0          0 |    64,964 
      1967 |    41,336          0          0 |    41,336 
      1968 |    65,349          0          0 |    65,349 
      1969 |    66,778          0          0 |    66,778 
      1970 |    63,565          0          0 |    63,565 
      1971 |    64,390          0          0 |    64,390 
      1972 |    61,763          0          0 |    61,763 
      1973 |    60,911          0          0 |    60,911 
      1974 |    60,867          0          0 |    60,867 
      1975 |    59,852          0     

      1967 |         0          0     41,336 |    41,336 
      1968 |         0          0     65,349 |    65,349 
      1969 |         0          0     66,778 |    66,778 
      1970 |         0          0     63,565 |    63,565 
      1971 |         0          0     64,390 |    64,390 
      1972 |         0          0     61,763 |    61,763 
      1973 |         0          0     60,911 |    60,911 
      1974 |         0          0     60,867 |    60,867 
      1975 |         0          0     59,852 |    59,852 
      1976 |         0          0     61,484 |    61,484 
      1977 |         0          0     74,809 |    74,809 
      1978 |         0          0     73,947 |    73,947 
      1979 |         0          0     75,105 |    75,105 
      1980 |         0          0     90,041 |    90,041 
      1981 |         0          0     89,829 |    89,829 
      1982 |         0          0     80,395 |    80,395 
      1983 |         0          0     79,616 |    79,616 
      1984 |  

In [16]:
/* Replace wage with 1.5* the top coded threshold if its topcoded */

replace incwage = 99999*1.5 if topcode_incwage == 1 & inrange(year, 1962, 1975) 
replace incwage = 50000*1.5 if topcode_incwage == 1 & inrange(year, 1976, 1981)
replace incwage = 75000*1.5 if topcode_incwage == 1 & inrange(year, 1982, 1984) 
replace incwage = 99999*1.5 if topcode_incwage == 1 & inrange(year, 1985, 1987)

replace inclongj = 99999* 1.5 if topcode_inclongj == 1 & inrange(year, 1988, 1995)
replace inclongj = 150000* 1.5 if topcode_inclongj == 1 & inrange(year, 1996, 2002)
replace inclongj = 200000* 1.5 if topcode_inclongj == 1 & inrange(year, 2003, 2010)
replace inclongj = 250000* 1.5 if topcode_inclongj == 1 & inrange(year, 2011, 2014)
replace inclongj = 280000* 1.5 if topcode_inclongj == 1 & year==2015
replace inclongj = 300000* 1.5 if topcode_inclongj == 1 & inrange(year, 2016, 2018)
replace inclongj = 310000* 1.5 if topcode_inclongj == 1 & year==2019
replace inclongj = 360000* 1.5 if topcode_inclongj == 1 & year==2020

replace oincwage = 99999* 1.5 if topcode_oincwage == 1 & inrange(year, 1988, 1995)
replace oincwage = 25000* 1.5 if topcode_oincwage == 1 & inrange(year, 1996, 2002)
replace oincwage = 35000* 1.5 if topcode_oincwage == 1 & inrange(year, 2003, 2010)
replace oincwage = 47000* 1.5 if topcode_oincwage == 1 & inrange(year, 2011, 2014)
replace oincwage = 56000* 1.5 if topcode_oincwage == 1 & year==2015
replace oincwage = 55000* 1.5 if topcode_oincwage == 1 & inrange(year, 2016, 2017)
replace oincwage = 56000* 1.5 if topcode_oincwage == 1 & year==2018
replace oincwage = 60000* 1.5 if topcode_oincwage == 1 & year==2019
replace oincwage = 70000* 1.5 if topcode_oincwage == 1 & year==2020

replace incwage= inclongj + oincwage if topcode_inclongj == 1 | topcode_oincwage == 1


/* We create top codes for weekly wages */

gen top_code_wwage = 0
replace top_code_wwage=1 if (incwage/wkswork1)>(99999*1.5/40) & inrange(year, 1964, 1975)
replace top_code_wwage=1 if (incwage/wkswork1)>(50000*1.5/40) & inrange(year, 1976, 1981)
replace top_code_wwage=1 if (incwage/wkswork1)>(75000*1.5/40) & inrange(year, 1982, 1984)
replace top_code_wwage=1 if (incwage/wkswork1)>(99999*1.5/40) & inrange(year, 1985, 1987)
replace top_code_wwage=1 if (incwage/wkswork1)>((99999+99999)*1.5/40) & inrange(year, 1988, 1995)
replace top_code_wwage=1 if (incwage/wkswork1)>((150000+25000)*1.5/40) & inrange(year, 1996, 2002)
replace top_code_wwage=1 if (incwage/wkswork1)>((200000+35000)*1.5/40) & inrange(year, 2003, 2010)
replace top_code_wwage=1 if (incwage/wkswork1)>((250000+47000)*1.5/40) & inrange(year, 2011, 2014)
replace top_code_wwage=1 if (incwage/wkswork1)>((280000+56000)*1.5/40) & year==2015
replace top_code_wwage=1 if (incwage/wkswork1)>((300000+55000)*1.5/40) & year>=2016 & year<=2017
replace top_code_wwage=1 if (incwage/wkswork1)>((300000+56000)*1.5/40) & year==2018
replace top_code_wwage=1 if (incwage/wkswork1)>((310000+60000)*1.5/40) & year==2019
replace top_code_wwage=1 if (incwage/wkswork1)>((360000+70000)*1.5/40) & year==2020

label variable top_code_wwage "Top Coded Values of Weekly Wages" // To rename variable labels
label define WWTOPCODE 0 "0-Not Top Coded" 1 "1-Top Coded" // To rename labels, so it's not just 1 and 0 
label value topcode WWTOPCODE
tab year top_code_wwage, missing 



(0 real changes made)

(3,112 real changes made)

(1,221 real changes made)

variable incwage was long now double
(984 real changes made)

variable inclongj was long now double
(7,312 real changes made)

(6,521 real changes made)

(9,454 real changes made)

(3,677 real changes made)

(791 real changes made)

(2,331 real changes made)

(701 real changes made)

(631 real changes made)

variable oincwage was long now double
(61 real changes made)

(4,437 real changes made)

(4,169 real changes made)

(1,215 real changes made)

(280 real changes made)

(550 real changes made)

(260 real changes made)

(246 real changes made)

(227 real changes made)

(42,304 real changes made)


(9 real changes made)

(241 real changes made)

(73 real changes made)

(64 real changes made)

(103 real changes made)

(312 real changes made)

(445 real changes made)

(151 real changes made)

(36 real changes made)

(86 real changes made)

(35 real changes made)

(29 real changes made)

(31 real changes made)


### Import PCE data

In [17]:
/* Import PCE data */

replace year = year-1
sort year
merge m:1 year using "../Data/pce", keep(match) nogenerate
replace year = year + 1 
describe
summarize


(4,585,342 real changes made)



    Result                           # of obs.
    -----------------------------------------
    not matched                             0
    matched                         4,585,342  
    -----------------------------------------

(4,585,342 real changes made)


Contains data from ..\Data\cps.dta
  obs:     4,585,342                          
 vars:            82                          23 JUL 2021 03:54
--------------------------------------------------------------------------------
              storage   display    value
variable name   type    format     label      variable label
--------------------------------------------------------------------------------
year            int     %8.0g                 survey year
serial          long    %12.0g                household serial number
month           byte    %8.0g      MONTH      month
cpsid           double  %12.0g                cpsid, household record
asecflag        byte    %8.0g      ASECF

Sorted by: 
     Note: Dataset has changed since last saved.


    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
        year |  4,585,342     1994.89    15.74444       1964       2020
      serial |  4,585,342    39386.42    25552.66          1      99986
       month |  4,585,342           3           0          3          3
       cpsid |  3,840,329    1.59e+13    8.08e+12          0   2.02e+13
    asecflag |  3,915,138           1           0          1          1
-------------+---------------------------------------------------------
       hflag |     97,472    .3019636    .4591119          0          1
     asecwth |  4,585,342    1582.467    972.7604          0   28722.98
      region |  4,585,222    27.38152    10.78493         11         42
    statefip |  4,585,342    30.45046    18.66261          1         99
 marbasecidh |  3,840,329    9.23e+08    4.56e+08     168591   1.20e+09
-

In [18]:
/* We create weekly wages */

gen weekly_wage = incwage/wkswork1 if wageworker == 1 & incwage>0 
label value weekly_wage WEEKLY_WAGE
label variable weekly_wage "weekly wage of wage and salary workers"



(470,628 missing values generated)




In [19]:
/*generate deflated wages */

summarize pce if inlist(year, 2001), meanonly // 2001 survey year = 2000 pce year
scalar pce2000=r(mean)
scalar list pce2000

summarize pce if inlist(year, 1983), meanonly // 1983 survey year = 1982 pce year
scalar pce1982=r(mean)
scalar list pce1982


generate real_weekly_wage = ((pce2000*weekly_wage)/pce) // deflated to 2000 year dollars
label variable real_weekly_wage "real weekly wage in 2000$, using PCE deflator"

/* Generate bottom weekley wages in 1982 dollars */

generate weekly_wage_less_than_sixtyseven = ((pce2000*weekly_wage)/pce) < ((67*78.235)/47.456) 
label variable weekly_wage_less_than_sixtyseven "bottom weekly wages in 1982$, using PCE deflator"






   pce2000 =     78.235



   pce1982 =     47.456

(470,628 missing values generated)




In [20]:
/* Keep workers ages 16 to 64 */

keep if age>=17 & age<=65 

/* We drop 3/8 file in order to better compare income estimates from ASEC 2014 and prior */ 

drop if hflag == 1


(168,378 observations deleted)

(27,945 observations deleted)


In [21]:
save "../Data/cleaned_cps", replace

(note: file ../Data/cleaned_cps.dta not found)
file ../Data/cleaned_cps.dta saved
