Skip to content

Commit

Permalink
fix dodgy indents
Browse files Browse the repository at this point in the history
  • Loading branch information
samnlindsay committed Feb 17, 2022
1 parent 19715fd commit 2de14a6
Showing 1 changed file with 7 additions and 9 deletions.
16 changes: 7 additions & 9 deletions splink/missingness.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,26 @@ def missingness_chart(df: DataFrame):
df (DataFrame): Input Spark dataframe
Returns:
Bar chart of missingness
"""

# Load JSON definition of missingness chart
chart_path = "missingness_chart_def.json"
missingness_chart_def = load_chart_definition(chart_path)



# Data for plot
# Count and percentage of nulls in each columns as pandas dataframe
df_nulls = df.select([count(when(col(c).isNull(), c)).alias(c) for c in df.columns])
pd_nulls = df_nulls.toPandas()
pd_nulls = pd.melt(pd_nulls)

record_count = df.count()
pd_nulls["percentage"] = pd_nulls['value']/record_count
pd_nulls["percentage"] = pd_nulls["value"] / record_count

# Add data to JSON chart definition
missingness_chart_def["data"]["values"] = pd_nulls.to_dict("records")

# Update chart title
for c in missingness_chart_def["layer"]:
c["title"] = f"Missingness per column out of {record_count:,.0f} records"

return altair_if_installed_else_json(missingness_chart_def)

0 comments on commit 2de14a6

Please sign in to comment.