In [1]:
import pandas as pd
import geopandas as gp
import numpy as np

### Datenimport

Daten von Nikolai:

In [2]:
data = pd.read_csv("data/us-counties-2024.csv", sep=",")
data = data.rename({"fips": "fips",
                    "winner": "winner", 
                    "population": "population",
                    "shifted": "shifted",
}, axis=1)[["fips", "winner", "shifted", "population", "eevp", "flipped"]]
data.head(2)

Unnamed: 0,fips,winner,shifted,population,eevp,flipped
0,1001,Trump,1.023669,60342,99.0,
1,1003,Trump,2.249105,253507,99.0,


Geodaten von https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html

In [3]:
geometry = gp.read_file("data/geom.geojson")
geometry["fips"] = pd.to_numeric(geometry.GEOID)
geometry = geometry[["fips", "NAME", "STUSPS" ,"geometry"]].rename({
    "fips": "fips",
    "NAME": "county",
    "STUSPS": "state"
}, axis=1)
geometry.head(2)

Unnamed: 0,fips,county,state,geometry
0,13027,Brooks,GA,"MULTIPOLYGON (((-83.73616 31.03768, -83.57396 ..."
1,31095,Jefferson,NE,"MULTIPOLYGON (((-97.36869 40.35039, -96.91606 ..."


### Fehlende Counties
Differenz zwischen den beiden Datensätzen:

In [4]:
d1 = (geometry
 .merge(data, left_on="fips", right_on="fips", how="outer" ,indicator=True))
d1.head(2)

Unnamed: 0,fips,county,state,geometry,winner,shifted,population,eevp,flipped,_merge
0,13027,Brooks,GA,"MULTIPOLYGON (((-83.73616 31.03768, -83.57396 ...",Trump,3.233335,16245.0,99.0,,both
1,31095,Jefferson,NE,"MULTIPOLYGON (((-97.36869 40.35039, -96.91606 ...",Trump,1.497509,7054.0,99.0,,both


In [5]:
d1.loc[d1._merge == 'left_only', 'datensatz'] = 'Geodaten'
d1.loc[d1._merge == 'right_only', 'datensatz'] = 'Nikolai'
d1.query("datensatz.isna() == False")[['fips', 'county', 'state', 'datensatz']].to_csv('temp/diff.csv', index=False)

### Puerto Rico kann weg

"Die haben eh keine Rechte" – darum zeigen wir Puerto Rico nicht. Die FIPS-Codes für Puerto Rico liegen zwiuschen 72000 und 72153. Siehe: https://www.nrcs.usda.gov/wps/portal/nrcs/detail/national/home/?cid=nrcs143_013697

In [6]:
d2 = d1[(d1.fips < 72000)|(d1.fips > 72153)].copy()

## EEVP

In [7]:
d2.loc[d2.eevp < 50, 'status'] = 'uncertain'
d2.loc[d2.eevp > 50, 'status'] = 'certain'
d2.head(2)

Unnamed: 0,fips,county,state,geometry,winner,shifted,population,eevp,flipped,_merge,datensatz,status
0,13027,Brooks,GA,"MULTIPOLYGON (((-83.73616 31.03768, -83.57396 ...",Trump,3.233335,16245.0,99.0,,both,,certain
1,31095,Jefferson,NE,"MULTIPOLYGON (((-97.36869 40.35039, -96.91606 ...",Trump,1.497509,7054.0,99.0,,both,,certain


### Alaska
Alle ausser Denali ausblenden

In [8]:
d2['visibility'] = "visible"
d2.loc[(d2.state == "AK") & (d2.county != 'Denali'), 'visibility'] = "hidden"

### Validity checks
Anteile sollten nicht grösser als 100 Prozent sein

In [9]:
assert d2.winner.isin(['Harris', 'Trump', np.nan]).all()
assert d2.shifted.min() > -100
assert d2.shifted.max() < 100

### TopoJSON erzeugen

Wird im nächsten Schritt mit der Vega-Spec kombiniert um die Plots zu erzeugen.
Das geht nicht direkt mit Geopandas.

Braucht die geo2topo-binary. Die kriegt man indem man das Topojson-Package installiert: https://github.com/topojson/topojson

In [10]:
d2[d2.geometry.isna() == False].to_file("temp/windofchange.geojson", driver='GeoJSON')

In [11]:
!geo2topo temp/windofchange.geojson>temp/windofchange.topo.json

### Dataframe für State-Detailansicht speichern

In [12]:
d2.to_pickle('temp/windofchange.pkl')

In [13]:
d2.shifted.max()

14.150467817214532

In [14]:
d2.shifted.min()

-6.379839821255146