In [1]:
%use dataframe(1.0.0-Beta3)

In [2]:
import ds.utils.db.DB
import ds.utils.db.selectDf
import ds.utils.db.selectTableAsDf

val data = DB.selectTableAsDf("airlines.boarding_data")

In [3]:
val userData = data
    .select { passenger_first_name and passenger_second_name and passenger_last_name and passenger_sex and passenger_document and passenger_birth_date }
    .distinct()

In [4]:
import java.util.Date

data class UserDto(
    val passenger_second_name: String,
    val passenger_sex: String,
    val passenger_document: String,
    val passenger_birth_date: Date,
)

In [5]:

infix fun <T> T.shouldBeEqueal(second: T): T {
    if (this != second) {
        throw IllegalArgumentException("Unexpeted missmatch")
    }
    return second
}

val shortNameRegex = "[A-Z]\\.".toRegex()

fun reduceUserDto0(first: UserDto, second: UserDto): UserDto {
    return UserDto(
        passenger_sex = first.passenger_sex shouldBeEqueal second.passenger_sex,
        passenger_document = first.passenger_document shouldBeEqueal second.passenger_document,
        passenger_birth_date = first.passenger_birth_date shouldBeEqueal second.passenger_birth_date,
        passenger_second_name = when {
            first.passenger_second_name.matches(shortNameRegex) -> second.passenger_second_name
            else -> first.passenger_second_name
        }
    )
}

fun reduceUserDto(first: UserDto, second: UserDto): UserDto = runCatching {
    reduceUserDto0(first, second)
}.onFailure {
	println("Missmatch: ${first}, ${second}")
}.getOrElse { first }

In [6]:
import ds.types.custom.Document
import javax.print.Doc

val userDataReducedSecondName = userData.groupBy { passenger_document and passenger_first_name and passenger_last_name }
    .toDataFrame()
    .mapToFrame {
        "document" from { Document.from(passenger_document) }
        "first_name" from { passenger_first_name }
        "last_name" from { passenger_last_name }
    	"reduced" from {
            "group"<DataFrame<*>>()
                .convertTo<UserDto> {  }
				.toList<UserDto>()
				.reduce(::reduceUserDto)
        }
    }


In [7]:
import java.util.UUID

val users = userDataReducedSecondName.mapToFrame {
    "document" from { document.toNormalizedString() }
    "first_name" from { first_name }
    "last_name" from { last_name }
    "second_name" from { reduced.passenger_second_name }
    "sex" from { reduced.passenger_sex }
    "birth_date" from { reduced.passenger_birth_date }
    "id" from { UUID.randomUUID() }
}

In [8]:
import ds.utils.db.insertTo

users.insertTo("airlines.boarding_data_users")


In [9]:
import ds.utils.db.execute

DB.execute("""
UPDATE airlines.boarding_data
SET user_id = t.uid
FROM (
    SELECT bd.id as rid, bu.id as uid
        FROM airlines.boarding_data bd
    JOIN airlines.boarding_data_users bu ON
        bd.passenger_document = bu.document
        AND bd.passenger_first_name = bu.first_name
        AND bd.passenger_last_name = bu.last_name
 ) t
WHERE id = t.rid;
""")

155147