In [1]:
data demographics;
    infle 'mydata.txt';
    input Gender $ Age Height Weight;
run;

title "Listing of data set DEMOGRAPHICS";
    proc print data = demographics;
run;


SAS Connection established. Subprocess id is 2669



Obs,Gender,Age,Height,Weight
1,M,50,68,155
2,F,23,60,101
3,M,65,72,220
4,F,35,65,133
5,M,15,71,166


### Reading CSV file:

In [8]:
filename preston 'mydata.csv';
data demographic;
    infile preston dsd dlm = ',';
    input Gender $ Age Height Weight;
run;

title "Listing of data set DEMOGRAPHICS";
    proc print data = demographic;
run;

Obs,Gender,Age,Height,Weight
1,M,50,68,155
2,F,23,60,101
3,M,65,72,220
4,F,35,65,133
5,M,15,71,166


### Placing Data Lines Directly in Program

In [10]:
data demographic;
    input Gender $ Age Height Weight;

datalines;
M 50 68 155
F 23 60 101
M 65 72 220
F 35 65 133
M 15 71 166

title "Listing of data set DEMOGRAPHICS";
    proc print data = demographic;
run;

Obs,Gender,Age,Height,Weight
1,M,50,68,155
2,F,23,60,101
3,M,65,72,220
4,F,35,65,133
5,M,15,71,166


In [13]:
data demographics;
    infile datalines dsd;
    input Gender $ Age Height Weight;
datalines;
"M",50,68,155
"F",23,60,101
"M",65,72,220
"F",35,65,133
"M",15,71,156
;

title "Listing of data set DEMOGRAPHICS";
    proc print data = demographics;
run;

Obs,Gender,Age,Height,Weight
1,M,50,68,155
2,F,23,60,101
3,M,65,72,220
4,F,35,65,133
5,M,15,71,156


###  Reading Raw Data from Fixed Columns

#### Method 1: Column Input

In [14]:
data financial;
    infile 'bank.txt';
    input   Subj $ 1-3
            DOB  $ 4-13
            Gender $ 14
            Balance 15 -21;
    run;

title "Bank";
    proc print data = financial;
run;

Obs,Subj,DOB,Gender,Balance
1,1,10/21/1955,M,1145.0
2,2,11/18/2001,F,18722.0
3,3,05/07/1944,M,123.45
4,4,07/25/1945,F,-12345.0


### Formatted Input:

In [2]:
data financial;
    infile 'bank.txt';
    input   @1 Subj $3.
            @4 DOB mmddyy10.
            @14 Gender $1.
            @15 Balance 7.;
run;

title "Bank";
    proc print data = financial;
run;

Obs,Subj,DOB,Gender,Balance
1,1,-1533,M,1145.0
2,2,15297,F,18722.0
3,3,-5717,M,123.45
4,4,-5273,F,-12345.0


In [3]:
/* Demonstrating a FORMAT statement */
title "Listing of FINANCIAL";
    proc print data=financial;
        format  DOB     mmddyy10.
                Balance dollar11.2;
run;

Obs,Subj,DOB,Gender,Balance
1,1,10/21/1955,M,"$1,145.00"
2,2,11/18/2001,F,"$18,722.00"
3,3,05/07/1944,M,$123.45
4,4,07/25/1945,F,"$-12,345.00"


In [4]:
title "Listing of FINANCIAL";
    proc print data=financial;
            format DOB date9.
            Balance dollar11.2;
run;

Obs,Subj,DOB,Gender,Balance
1,1,21OCT1955,M,"$1,145.00"
2,2,18NOV2001,F,"$18,722.00"
3,3,07MAY1944,M,$123.45
4,4,25JUL1945,F,"$-12,345.00"


In [6]:
/* Using informats with list input */
data list_example;
    infile 'list.csv' dsd;
        input Subj : $3.
        Name : $20.
        DOB : mmddyy10.
        Salary : dollar8.;
    format DOB date9. Salary dollar8.;
run;

Title "Output";
    proc print data = list_example;
run

Obs,Subj,Name,DOB,Salary
1,1,Christopher Mullens,12NOV1955,"$45,200"
2,2,Michelle Kwo,12SEP1955,"$78,123"
3,3,Roger W. McDonald,01JAN1960,"$107,200"


In [8]:
/* Supplying an informat statement with list input */
data list_example;
    informat    Subj $3.
                Name $20.
                DOB mmddyy10.
                Salary dollar8.;
    infile 'list.csv' dsd;
    input   Subj
            Name
            DOB
            Salary;
    format DOB date9. Salary dollar8.;
run;

Title "List_Example";
    proc print data = list_example;
run;

Obs,Subj,Name,DOB,Salary
1,1,Christopher Mullens,12NOV1955,"$45,200"
2,2,Michelle Kwo,12SEP1955,"$78,123"
3,3,Roger W. McDonald,01JAN1960,"$107,200"


In [10]:
/*  Demonstrating the ampersand modifier for list input */
data list_example;
    infile 'list.txt';
        input Subj : $3.
        Name & $20.
        DOB : mmddyy10.
        Salary : dollar8.;
    format DOB date9. Salary dollar8.;
run;

Title 'Example';
    proc print data = list_example;
run;

Obs,Subj,Name,DOB,Salary
1,1,Christopher Mullens,12NOV1955,"$45,200"
2,2,Michelle Kwo,12SEP1955,"$78,123"
3,3,Roger W. McDonald,01JAN1960,"$107,200"


## Exercise 3.1

In [14]:
data prob1;
    infile 'scores.txt';
    input Gender $ English History Math Science;
    Average = sum(English, History, Math, Science)/4;
run;

Title 'Score Table';
    proc print data = prob1;
run;

Obs,Gender,English,History,Math,Science,Average
1,M,80,82,85,88,83.75
2,F,94,92,88,96,92.5
3,M,96,88,89,92,91.25
4,F,95,.,92,92,69.75


## Exercise 3.2

In [22]:
data Vote;
    infile 'political.csv' dsd;
    input State $ Political_Party $ Age;
    
run; 

Title 'Vote';
    proc print data = Vote;
run;

title "Frequency Distribution of Political Party";
proc freq data = Vote;
    tables Political_Party;
run;

Obs,State,Political_Party,Age
1,NJ,Ind,55
2,CO,Dem,45
3,NY,Rep,23
4,FL,Dem,66
5,NJ,Rep,34

Political_Party,Frequency,Percent,Cumulative Frequency,Cumulative Percent
Dem,2,40.0,2,40.0
Ind,1,20.0,3,60.0
Rep,2,40.0,5,100.0


## Exercise 3.3

In [28]:
data Company;
    infile 'company.txt' dsd dlm = "$";
    input LastName $ EmpNo $ Salary;
    
run;

Title 'Company Information';
    proc print data = Company;
run;
    

Obs,LastName,EmpNo,Salary
1,Roberts,M234,45000
2,Chien,M74777,.
3,Walters,,75000
4,Rogers,F7272,78131


## Exercise 3.5

In [59]:
data newZ;
    infile datalines dsd;
    input X Y;
    Z = 100 + 50 * X - 2 * X * X - 25 * Y  + Y ** 2;
datalines;
1,2
3,6
5,9
9,11
;

title "Listing of data set";
    proc print data = newZ;
run;

Obs,X,Y,Z
1,1,2,102
2,3,6,118
3,5,9,156
4,9,11,234


## Exercise 3.6

In [57]:
data Bank;
    infile 'bankdata.txt';
    input @1 Name $15.
            @16 Acct $5.
            @21 Balance 6.
            @27 Rate 4.;
    format Balance dollar10.;
run;

title "Bank";
    proc print data = Bank;
run;

Obs,Name,Acct,Balance,Rate
1,Philip Jones,V1234,$432,2.32
2,Nathan Philips,V1399,"$1,520",2.45
3,Shu Lu,W8892,"$45,123",3.45
4,Betty Boop,V7677,"$5,000",2.78


## Exercise 3.7

In [51]:
data Cache;
    infile 'geocaching.txt';
    input   @1 Name  $20.
            @21 LongDeg 2.
            @23 LongMin 6.
            @29 LagDeg  2.
            @31 LatMin 6.;
run;

Title 'Geo Caching';
    proc print data = Cache;
run;

Obs,Name,LongDeg,LongMin,LagDeg,LatMin
1,Higgensville Hike,40,30.293,74,46.539
2,Really Roaring,40,27.404,74,42.147
3,Cushetunk Climb,40,37.024,74,48.014
4,Uplands Trek,40,30.99,74,52.794


## Exercise 3.10

In [50]:
data Stocks;
    infile "stockprices.txt";
    input   @1 Stock $4.
            @5 PurDate mmddyy10.
            @15 PurPrice dollar6.
            @21 Number 4.
            @25 SellDate mmddyy10.
            @35 SellPrice dollar6.;
    TotalPur = PurPrice * Number;
    TotalSell = SellPrice * Number;
    Profit = TotalSell - TotalPur;
    format  PurDate mmddyy10. 
            PurPrice dollar8.1 
            SellDate mmddyy10. 
            SellPrice dollar8.2
            TotalPur dollar8.
            TotalSell dollar8.
            Profit dollar8.;
            
run;

Title 'Stock Table';
    proc print data = Stocks;
run;

Obs,Stock,PurDate,PurPrice,Number,SellDate,SellPrice,TotalPur,TotalSell,Profit
1,IBM,05/21/2006,$80.0,100,07/20/2006,$88.50,"$8,000","$8,850",$850
2,CSCO,04/05/2005,$17.5,200,09/21/2005,$23.60,"$3,500","$4,720","$1,220"
3,MOT,03/01/2004,$14.7,500,10/10/2006,$19.90,"$7,350","$9,950","$2,600"
4,XMSR,04/15/2006,$28.4,200,04/15/2006,$12.70,"$5,680","$2,540","$-3,140"
5,BBY,02/15/2005,$45.2,100,09/09/2006,$56.80,"$4,520","$5,680","$1,160"


## Exercise 3.11

In [35]:
data Employee;
    infile "employee.csv" dsd;
    informat ID $3. Name $20. Dept $20. DateHire mmddyy10. Salary dollar8.;
    input ID Name Dept DateHire Salary;
    format DateHire mmddyy10. Salary dollar11.;
run;

Title "Employee";
    proc print data = Employee;
run;

title "Frequency Distribution of Dept";
proc freq data = Employee;
    tables Dept;
run;

Obs,ID,Name,Dept,DateHire,Salary
1,123,Harold Wilson,Acct,01/15/1989,"$78,123"
2,128,Julia Child,Food,08/29/1988,"$89,123"
3,7,James Bond,Security,02/01/2000,"$82,100"
4,828,Roger Doger,Acct,08/15/1999,"$39,100"
5,900,Earl Davenport,Food,09/09/1989,"$45,399"
6,906,James Swindler,Acct,12/21/1978,"$78,200"

Dept,Frequency,Percent,Cumulative Frequency,Cumulative Percent
Acct,3,50.0,3,50.0
Food,2,33.33,5,83.33
Security,1,16.67,6,100.0
