Skip to content

Commit

Permalink
Update readstat
Browse files Browse the repository at this point in the history
Fixes #235. Fixes #245. Closes #255.
  • Loading branch information
hadley committed Jan 25, 2017
1 parent e52b0b4 commit 42c8883
Show file tree
Hide file tree
Showing 11 changed files with 356 additions and 101 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Expand Up @@ -8,6 +8,7 @@
* SAS: support Win baltic code page (#231)
* SPSS: support uppercase time stamps (#230)
* SPSS: fixes for 252-255 byte strings (#226)
* SPSS: fixes for 0 byte strings (#245)

# haven 1.0.0

Expand Down
13 changes: 12 additions & 1 deletion src/readstat/sas/readstat_sas7bdat_write.c
Expand Up @@ -144,7 +144,18 @@ static readstat_error_t sas7bdat_emit_header(readstat_writer_t *writer, sas_head
};

memcpy(&header_start.magic, sas7bdat_magic_number, sizeof(header_start.magic));
strncpy(header_start.file_label, writer->file_label, sizeof(header_start.file_label));

memset(header_start.file_label, ' ', sizeof(header_start.file_label));

size_t file_label_len = strlen(writer->file_label);
if (file_label_len > sizeof(header_start.file_label))
file_label_len = sizeof(header_start.file_label);

if (file_label_len) {
memcpy(header_start.file_label, writer->file_label, file_label_len);
} else {
memcpy(header_start.file_label, "DATASET", sizeof("DATASET")-1);
}

return sas_write_header(writer, hinfo, header_start);
}
Expand Down
81 changes: 81 additions & 0 deletions src/readstat/spss/readstat_sav_date.c
@@ -0,0 +1,81 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

static inline int is_leap(int year) {
return ((year % 4 == 0 && year % 100 != 0) || year % 400 ==0);
}

double readstat_sav_date_parse(const char *s, char **dest) {
// A SPSS date stored as the number of seconds since the start of the Gregorian calendar (midnight, Oct 14, 1582)
// Through the C interface in savReaderWriter I've verifed that leap seconds is ignored
int daysPerMonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
int daysPerMonthLeap[] = {31,29,31,30,31,30,31,31,30,31,30,31};
int year, month, day;
if (strlen(s) == 0) {
*dest = (char*) s;
return 0;
}
int ret = sscanf(s, "%d-%d-%d", &year, &month, &day);
month--;
if (month < 0 || month > 11 || ret!=3) {
*dest = (char*)s;
return 0;
}
int maxdays = (is_leap(year) ? daysPerMonthLeap : daysPerMonth)[month];
if (day < 1 || day > maxdays) {
*dest =(char*)s;
return 0;
} else {
int days = 0;

for (int i=1582; i<year; i++) {
days += is_leap(i) ? 366 : 365;
}

for (int m=0; m<month; m++) {
days += is_leap(year) ? daysPerMonthLeap[m] : daysPerMonth[m];
}

days += day-1;
char buf[1024];
*dest = (char*)s + snprintf(buf, sizeof(buf), "%d-%d-%d", year, month+1, day);
return (days * 86400.0) - 24710400; // 24710400 is the number of seconds in 1582 before Oct 14
}
}

char* readstat_sav_date_string(double seconds, char* dest, int size) {
int yr = 1582;
int month = 0;
int day = 1;
int daysPerMonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
int daysPerMonthLeap[] = {31,29,31,30,31,30,31,31,30,31,30,31};
double secs = seconds;
secs += 24710400.0;
double days = secs / 86400.0;
double err = ceil(days) - days;
if (err != 0.0) {
fprintf(stderr, "%s:%d time not supported. seconds was %lf, err was %lf\n", __FILE__, __LINE__, seconds, err);
return NULL;
}

while (days > 0) {
int days_in_year = is_leap(yr) ? 366 : 365;
if (days >= days_in_year) {
yr+=1;
days-=days_in_year;
continue;
}
int days_in_month = is_leap(yr) ? daysPerMonthLeap[month] : daysPerMonth[month];
if (days >= days_in_month) {
month+=1;
days-=days_in_month;
continue;
}
day+= days;
days = 0;
}
snprintf(dest, size, "%04d-%02d-%02d", yr, month+1, day);
return dest;
}
7 changes: 7 additions & 0 deletions src/readstat/spss/readstat_sav_date.h
@@ -0,0 +1,7 @@
#ifndef __READSTAT_SAV_DATE_H
#define __READSTAT_SAV_DATE_H

double readstat_sav_date_parse(const char *s, char **dest);
char* readstat_sav_date_string(double seconds, char* dest, int size);

#endif
4 changes: 3 additions & 1 deletion src/readstat/spss/readstat_sav_write.c
Expand Up @@ -907,7 +907,9 @@ static size_t sav_variable_width(readstat_type_t type, size_t user_width) {
size_t last_segment_width = ((user_width - (n_segments - 1) * 252) + 7)/8*8;
return (n_segments-1)*256 + last_segment_width;
}

if (user_width == 0) {
return 8;
}
return (user_width + 7) / 8 * 8;
}
return 8;
Expand Down
14 changes: 11 additions & 3 deletions src/readstat/stata/readstat_dta.h
Expand Up @@ -11,11 +11,19 @@ typedef struct dta_header_s {
int32_t nobs;
} dta_header_t;

typedef struct dta_strl_header_s {
unsigned char vo_bytes[8];
typedef struct dta_117_strl_header_s {
uint32_t v;
uint32_t o;
unsigned char type;
int32_t len;
} dta_strl_header_t;
} dta_117_strl_header_t;

typedef struct dta_118_strl_header_s {
uint32_t v;
uint64_t o;
unsigned char type;
int32_t len;
} dta_118_strl_header_t;

#pragma pack(pop)

Expand Down
96 changes: 96 additions & 0 deletions src/readstat/stata/readstat_dta_days.c
@@ -0,0 +1,96 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static inline int is_leap(int year) {
return ((year % 4 == 0 && year % 100 != 0) || year % 400 ==0);
}

int readstat_dta_num_days(const char *s, char **dest) {
int daysPerMonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
int daysPerMonthLeap[] = {31,29,31,30,31,30,31,31,30,31,30,31};
int year, month, day;
if (strlen(s) == 0) {
*dest = (char*) s;
return 0;
}
int ret = sscanf(s, "%d-%d-%d", &year, &month, &day);
month--;
if (month < 0 || month > 11 || ret!=3) {
*dest = (char*)s;
return 0;
}
int maxdays = (is_leap(year) ? daysPerMonthLeap : daysPerMonth)[month];
if (day < 1 || day > maxdays) {
*dest = (char*)s;
return 0;
} else {
int days = 0;

for (int i=year; i<1960; i++) {
days -= is_leap(i) ? 366 : 365;
}

for (int i=1960; i<year; i++) {
days += is_leap(i) ? 366 : 365;
}

for (int m=0; m<month; m++) {
days += is_leap(year) ? daysPerMonthLeap[m] : daysPerMonth[m];
}

days += day-1;
char buf[1024];
*dest = (char*)s + snprintf(buf, sizeof(buf), "%d-%d-%d", year, month+1, day);
return days;
}
}

char* readstat_dta_days_string(int days, char* dest, int size) {
// TODO: Candidate for clean up
int yr = 1960;
int month = 0;
int day = 1;
int daysPerMonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
int daysPerMonthLeap[] = {31,29,31,30,31,30,31,31,30,31,30,31};
if (days < 0) {
yr = 1959;
month = 11;
days = - days;
while (days > 0) {
int days_in_year = is_leap(yr) ? 366 : 365;
if (days > days_in_year) {
yr-=1;
days-=days_in_year;
continue;
}
int days_in_month = is_leap(yr) ? daysPerMonthLeap[month] : daysPerMonth[month];
if (days > days_in_month) {
month-=1;
days-=days_in_month;
continue;
}
day = days_in_month-days + 1;
days = 0;
}
} else {
while (days > 0) {
int days_in_year = is_leap(yr) ? 366 : 365;
if (days >= days_in_year) {
yr+=1;
days-=days_in_year;
continue;
}
int days_in_month = is_leap(yr) ? daysPerMonthLeap[month] : daysPerMonth[month];
if (days >= days_in_month) {
month+=1;
days-=days_in_month;
continue;
}
day+= days;
days = 0;
}
}
snprintf(dest, size, "%04d-%02d-%02d", yr, month+1, day);
return dest;
}
7 changes: 7 additions & 0 deletions src/readstat/stata/readstat_dta_days.h
@@ -0,0 +1,7 @@
#ifndef __READSTAT_DTA_DAYS_H
#define __READSTAT_DTA_DAYS_H

int readstat_dta_num_days(const char *s, char** dest);
char* readstat_dta_days_string(int days, char* dest, int size);

#endif

0 comments on commit 42c8883

Please sign in to comment.