# Create a file with one line per Texas county
#### Will be pulled from Wikipedia [here](https://en.wikipedia.org/wiki/List_of_counties_in_Texas)

* Output: `texas_counties.csv`

Author: Everett Wetchler (everett.wetchler@gmail.com)

## Configuration

In [1]:
DTW_PROJECT_KEY = 'tji/auxiliary-datasets'
FILENAME = 'texas_counties.csv'

## Run

In [2]:
import datadotworld as dw
import pandas as pd
import os

%load_ext watermark
%watermark -a "Everett Wetchler" -d -t -z -p pandas,datadotworld

Everett Wetchler 2018-04-28 13:20:53 CDT

pandas 0.20.1
datadotworld 1.6.0


In [3]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_counties_in_Texas', match='Aransas County', header=0)[0]
print(len(df), 'counties found')
if len(df) != 254:
    raise Exception(f"Expected to find 254 counties. Found {len(df)}.")
df = df.iloc[:,[0,2]]
df.columns = ['county', 'seat']
df['county'] = df['county'].apply(lambda name: name[:-(len('County') + 1)])
for c in df.columns:
    df[c] = df[c].str.upper()
df.head()

254 counties found


Unnamed: 0,county,seat
0,ANDERSON,PALESTINE
1,ANDREWS,ANDREWS
2,ANGELINA,LUFKIN
3,ARANSAS,ROCKPORT
4,ARCHER,ARCHER CITY


## Write

In [4]:
print("Writing to data.world")
with dw.open_remote_file(DTW_PROJECT_KEY, FILENAME) as w:
    df.to_csv(w, index=False)

Writing to data.world


In [5]:
print("done")

done
