In [1]:
def cubed(s):
    return s*s*s

In [6]:
from pyspark.sql.types import LongType
spark.udf.register("cubed",cubed, LongType())

<function __main__.cubed(s)>

In [7]:
spark.range(1,9).createOrReplaceTempView("udf_test")

In [9]:
spark.sql("SELECT id, cubed(id) AS cubed_id FROM udf_test").show()

+---+--------+
| id|cubed_id|
+---+--------+
|  1|       1|
|  2|       8|
|  3|      27|
|  4|      64|
|  5|     125|
|  6|     216|
|  7|     343|
|  8|     512|
+---+--------+



In [11]:
import pandas as pd
from pyspark.sql.functions import col, pandas_udf
from pyspark.sql.types import LongType

def cubed(a: pd.Series)->pd.Series:
    return a*a*a

In [12]:
cubed_udf = pandas_udf(cubed, returnType=LongType())

ImportError: PyArrow >= 0.15.1 must be installed; however, it was not found.

In [21]:
from pyspark.sql.types import *
schema=StructType([StructField("celsius",ArrayType(IntegerType()))])

In [22]:
t_list = [[35,36,32,30,40,42,38]],[[31,32,34,55,56]]
t_c=spark.createDataFrame(t_list, schema)
t_c.createOrReplaceTempView("tC")

In [23]:
t_c.show(truncate=False)

+----------------------------+
|celsius                     |
+----------------------------+
|[35, 36, 32, 30, 40, 42, 38]|
|[31, 32, 34, 55, 56]        |
+----------------------------+



In [24]:
spark.sql("""SELECT celsius, transform(celsius, t-> ((t* 9) div 5) +32) as fahrenheit FROM tC""").show(truncate=False)

+----------------------------+-------------------------------+
|celsius                     |fahrenheit                     |
+----------------------------+-------------------------------+
|[35, 36, 32, 30, 40, 42, 38]|[95, 96, 89, 86, 104, 107, 100]|
|[31, 32, 34, 55, 56]        |[87, 89, 93, 131, 132]         |
+----------------------------+-------------------------------+



In [25]:
spark.sql("""SELECT celsius, filter(celsius, t -> t > 38) as high FROM tC""").show(truncate=False)

+----------------------------+--------+
|celsius                     |high    |
+----------------------------+--------+
|[35, 36, 32, 30, 40, 42, 38]|[40, 42]|
|[31, 32, 34, 55, 56]        |[55, 56]|
+----------------------------+--------+



In [26]:
spark.sql("""SELECT celsius, exists(celsius, t -> t = 38) as threshold FROM tC""").show(truncate=False)

+----------------------------+---------+
|celsius                     |threshold|
+----------------------------+---------+
|[35, 36, 32, 30, 40, 42, 38]|true     |
|[31, 32, 34, 55, 56]        |false    |
+----------------------------+---------+



In [27]:
# La función reduce no exite en Jupyter Notebook

#spark.sql("""
#SELECT celsius, 
# reduce(
# celsius, 
# 0, 
# (t, acc) -> t + acc, 
# acc -> (acc div size(celsius) * 9 div 5) + 32
# ) as avgFahrenheit 
# FROM tC
#""").show()

#Este sería el equivalente usando transform
spark.sql("""
SELECT celsius,
       transform (
           celsius,
           t -> ((t * 9) div 5) + 32
           ) as fahrenheit_temp       
FROM tC
""").show(10,False)


+----------------------------+-------------------------------+
|celsius                     |fahrenheit_temp                |
+----------------------------+-------------------------------+
|[35, 36, 32, 30, 40, 42, 38]|[95, 96, 89, 86, 104, 107, 100]|
|[31, 32, 34, 55, 56]        |[87, 89, 93, 131, 132]         |
+----------------------------+-------------------------------+



In [31]:
from pyspark.sql.functions import *
delaysPath = "C:/Users/alvaro.romero/Big_Data/LearningSparkV2-master/databricks-datasets/learning-spark-v2/flights/departuredelays.csv"
airportsPath = "C:/Users/alvaro.romero/Big_Data/LearningSparkV2-master/databricks-datasets/learning-spark-v2/flights/airport-codes-na.txt"

In [32]:
airports = (spark.read
           .option("header","true")
           .option("inferSchema","true")
           .option("delimiter", "\t")
           .csv(airportsPath))

airports.createOrReplaceTempView("airports_na")

In [33]:
delays = (spark.read
           .option("header","true")
           .csv(delaysPath)
           .withColumn("delay", expr("CAST(delay as INT) as delay"))
           .withColumn("distance", expr("CAST(distance as INT) as distance")))

delays.createOrReplaceTempView("departureDelays")

In [34]:
foo = delays.filter(expr("""origin == 'SEA' AND destination == 'SFO' AND date like '01010%' AND delay > 0"""))

foo.createOrReplaceTempView("foo")

In [8]:
spark.sql("SELECT * FROM airports_na LIMIT 10").show()

+-----------+-----+-------+----+
|       City|State|Country|IATA|
+-----------+-----+-------+----+
| Abbotsford|   BC| Canada| YXX|
|   Aberdeen|   SD|    USA| ABR|
|    Abilene|   TX|    USA| ABI|
|      Akron|   OH|    USA| CAK|
|    Alamosa|   CO|    USA| ALS|
|     Albany|   GA|    USA| ABY|
|     Albany|   NY|    USA| ALB|
|Albuquerque|   NM|    USA| ABQ|
| Alexandria|   LA|    USA| AEX|
|  Allentown|   PA|    USA| ABE|
+-----------+-----+-------+----+



In [9]:
spark.sql("SELECT * FROM departureDelays LIMIT 10").show()

+--------+-----+--------+------+-----------+
|    date|delay|distance|origin|destination|
+--------+-----+--------+------+-----------+
|01011245|    6|     602|   ABE|        ATL|
|01020600|   -8|     369|   ABE|        DTW|
|01021245|   -2|     602|   ABE|        ATL|
|01020605|   -4|     602|   ABE|        ATL|
|01031245|   -4|     602|   ABE|        ATL|
|01030605|    0|     602|   ABE|        ATL|
|01041243|   10|     602|   ABE|        ATL|
|01040605|   28|     602|   ABE|        ATL|
|01051245|   88|     602|   ABE|        ATL|
|01050605|    9|     602|   ABE|        ATL|
+--------+-----+--------+------+-----------+



In [10]:
spark.sql("SELECT * FROM foo").show()

+--------+-----+--------+------+-----------+
|    date|delay|distance|origin|destination|
+--------+-----+--------+------+-----------+
|01010710|   31|     590|   SEA|        SFO|
|01010955|  104|     590|   SEA|        SFO|
|01010730|    5|     590|   SEA|        SFO|
+--------+-----+--------+------+-----------+



In [11]:
bar = delays.union(foo)
bar.createOrReplaceTempView("bar")
bar.filter(expr("""origin == 'SEA' AND destination == 'SFO' AND date LIKE '01010%' AND delay > 0""")).show()

+--------+-----+--------+------+-----------+
|    date|delay|distance|origin|destination|
+--------+-----+--------+------+-----------+
|01010710|   31|     590|   SEA|        SFO|
|01010955|  104|     590|   SEA|        SFO|
|01010730|    5|     590|   SEA|        SFO|
|01010710|   31|     590|   SEA|        SFO|
|01010955|  104|     590|   SEA|        SFO|
|01010730|    5|     590|   SEA|        SFO|
+--------+-----+--------+------+-----------+



In [12]:
spark.sql("""
SELECT * 
 FROM bar 
 WHERE origin = 'SEA' 
 AND destination = 'SFO' 
 AND date LIKE '01010%' 
 AND delay > 0
""").show()

+--------+-----+--------+------+-----------+
|    date|delay|distance|origin|destination|
+--------+-----+--------+------+-----------+
|01010710|   31|     590|   SEA|        SFO|
|01010955|  104|     590|   SEA|        SFO|
|01010730|    5|     590|   SEA|        SFO|
|01010710|   31|     590|   SEA|        SFO|
|01010955|  104|     590|   SEA|        SFO|
|01010730|    5|     590|   SEA|        SFO|
+--------+-----+--------+------+-----------+



In [25]:
foo.join(airports, airports.IATA == foo.origin).select("City", "State", "date", "delay", "distance", "destination").show()

+-------+-----+--------+-----+--------+-----------+
|   City|State|    date|delay|distance|destination|
+-------+-----+--------+-----+--------+-----------+
|Seattle|   WA|01010710|   31|     590|        SFO|
|Seattle|   WA|01010955|  104|     590|        SFO|
|Seattle|   WA|01010730|    5|     590|        SFO|
+-------+-----+--------+-----+--------+-----------+



In [26]:
spark.sql("""
SELECT a.City, a.State, f.date, f.delay, f.distance, f.destination 
 FROM foo f
 JOIN airports_na a
 ON a.IATA = f.origin
""").show()

+-------+-----+--------+-----+--------+-----------+
|   City|State|    date|delay|distance|destination|
+-------+-----+--------+-----+--------+-----------+
|Seattle|   WA|01010710|   31|     590|        SFO|
|Seattle|   WA|01010955|  104|     590|        SFO|
|Seattle|   WA|01010730|    5|     590|        SFO|
+-------+-----+--------+-----+--------+-----------+



In [29]:
spark.stop()

In [30]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
spark = SparkSession.builder.appName("SparkSQLExampleApp").enableHiveSupport().getOrCreate()

In [35]:
spark.sql("DROP TABLE IF EXISTS departureDelaysWindow")

AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;

In [36]:
spark.sql("""CREATE TABLE departureDelaysWindow AS
SELECT origin, destination, SUM(delay) AS TotalDelays
 FROM departureDelays
WHERE origin IN ('SEA', 'SFO', 'JFK')
 AND destination IN ('SEA', 'SFO', 'JFK', 'DEN', 'ORD', 'LAX', 'ATL')
GROUP BY origin, destination""")

AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;

In [38]:
foo.show()

+--------+-----+--------+------+-----------+
|    date|delay|distance|origin|destination|
+--------+-----+--------+------+-----------+
|01010710|   31|     590|   SEA|        SFO|
|01010955|  104|     590|   SEA|        SFO|
|01010730|    5|     590|   SEA|        SFO|
+--------+-----+--------+------+-----------+



In [42]:
from pyspark.sql.functions import expr
foo2 = foo.withColumn("status", expr("CASE WHEN delay <= 10 THEN 'On-time' ELSE 'Delayed' END"))
foo2.show()

+--------+-----+--------+------+-----------+-------+
|    date|delay|distance|origin|destination| status|
+--------+-----+--------+------+-----------+-------+
|01010710|   31|     590|   SEA|        SFO|Delayed|
|01010955|  104|     590|   SEA|        SFO|Delayed|
|01010730|    5|     590|   SEA|        SFO|On-time|
+--------+-----+--------+------+-----------+-------+



In [44]:
foo3 = foo2.drop("delay")
foo3.show()

+--------+--------+------+-----------+-------+
|    date|distance|origin|destination| status|
+--------+--------+------+-----------+-------+
|01010710|     590|   SEA|        SFO|Delayed|
|01010955|     590|   SEA|        SFO|Delayed|
|01010730|     590|   SEA|        SFO|On-time|
+--------+--------+------+-----------+-------+



In [45]:
foo4 = foo3.withColumnRenamed("status", "flight_status")
foo4.show()

+--------+--------+------+-----------+-------------+
|    date|distance|origin|destination|flight_status|
+--------+--------+------+-----------+-------------+
|01010710|     590|   SEA|        SFO|      Delayed|
|01010955|     590|   SEA|        SFO|      Delayed|
|01010730|     590|   SEA|        SFO|      On-time|
+--------+--------+------+-----------+-------------+



In [46]:
spark.sql("""SELECT destination, cast(substring(date, 0, 2) as int) as month, delay
            FROM departureDelays
            WHERE origin = 'SEA'""").show()

+-----------+-----+-----+
|destination|month|delay|
+-----------+-----+-----+
|        ORD|    1|   92|
|        JFK|    1|   -7|
|        DFW|    1|   -5|
|        MIA|    1|   -3|
|        DFW|    1|   -3|
|        DFW|    1|    1|
|        ORD|    1|  -10|
|        DFW|    1|   -6|
|        DFW|    1|   -2|
|        ORD|    1|   -3|
|        ORD|    1|    0|
|        DFW|    1|   23|
|        DFW|    1|   36|
|        ORD|    1|  298|
|        JFK|    1|    4|
|        DFW|    1|    0|
|        MIA|    1|    2|
|        DFW|    1|    0|
|        DFW|    1|    0|
|        ORD|    1|   83|
+-----------+-----+-----+
only showing top 20 rows



In [47]:
spark.sql("""SELECT * FROM(
                SELECT destination, cast(substring(date,0,2)as int) as month, delay
                FROM departureDelays
                Where origin = 'SEA')
                PIVOT(
                    cast(avg(delay) as decimal(4,2)) as AvgDelay, max(delay) as MaxDelay FOR month in (1 Jan, 2 Feb)
                    )
                ORDER BY destination""").show()

+-----------+------------+------------+------------+------------+
|destination|Jan_AvgDelay|Jan_MaxDelay|Feb_AvgDelay|Feb_MaxDelay|
+-----------+------------+------------+------------+------------+
|        ABQ|       19.86|         316|       11.42|          69|
|        ANC|        4.44|         149|        7.90|         141|
|        ATL|       11.98|         397|        7.73|         145|
|        AUS|        3.48|          50|       -0.21|          18|
|        BOS|        7.84|         110|       14.58|         152|
|        BUR|       -2.03|          56|       -1.89|          78|
|        CLE|       16.00|          27|        null|        null|
|        CLT|        2.53|          41|       12.96|         228|
|        COS|        5.32|          82|       12.18|         203|
|        CVG|       -0.50|           4|        null|        null|
|        DCA|       -1.15|          50|        0.07|          34|
|        DEN|       13.13|         425|       12.95|         625|
|        D

## EJERCICIO MYSQL 

In [12]:
employeesDF = (spark
 .read
 .format("jdbc")
 .option("url", "jdbc:mysql://localhost/employees")
 .option("driver", "com.mysql.jdbc.Driver")
 .option("dbtable", "employees")
 .option("user", "root")
 .option("password", "alromeco")
.load())

employeesDF.show()

+------+----------+----------+-----------+------+----------+
|emp_no|birth_date|first_name|  last_name|gender| hire_date|
+------+----------+----------+-----------+------+----------+
| 10001|1953-09-02|    Georgi|    Facello|     M|1986-06-26|
| 10002|1964-06-02|   Bezalel|     Simmel|     F|1985-11-21|
| 10003|1959-12-03|     Parto|    Bamford|     M|1986-08-28|
| 10004|1954-05-01| Chirstian|    Koblick|     M|1986-12-01|
| 10005|1955-01-21|   Kyoichi|   Maliniak|     M|1989-09-12|
| 10006|1953-04-20|    Anneke|    Preusig|     F|1989-06-02|
| 10007|1957-05-23|   Tzvetan|  Zielinski|     F|1989-02-10|
| 10008|1958-02-19|    Saniya|   Kalloufi|     M|1994-09-15|
| 10009|1952-04-19|    Sumant|       Peac|     F|1985-02-18|
| 10010|1963-06-01| Duangkaew|   Piveteau|     F|1989-08-24|
| 10011|1953-11-07|      Mary|      Sluis|     F|1990-01-22|
| 10012|1960-10-04|  Patricio|  Bridgland|     M|1992-12-18|
| 10013|1963-06-07| Eberhardt|     Terkki|     M|1985-10-20|
| 10014|1956-02-12|     

In [11]:
departmentsDF = (spark
 .read
 .format("jdbc")
 .option("url", "jdbc:mysql://localhost/employees")
 .option("driver", "com.mysql.jdbc.Driver")
 .option("dbtable", "departments")
 .option("user", "root")
 .option("password", "alromeco")
.load())

departmentsDF.show()

+-------+------------------+
|dept_no|         dept_name|
+-------+------------------+
|   d009|  Customer Service|
|   d005|       Development|
|   d002|           Finance|
|   d003|   Human Resources|
|   d001|         Marketing|
|   d004|        Production|
|   d006|Quality Management|
|   d008|          Research|
|   d007|             Sales|
+-------+------------------+



In [10]:
salariesDF = (spark
 .read
 .format("jdbc")
 .option("url", "jdbc:mysql://localhost/employees")
 .option("driver", "com.mysql.jdbc.Driver")
 .option("dbtable", "salaries")
 .option("user", "root")
 .option("password", "alromeco")
.load())

salariesDF.show()

+------+------+----------+----------+
|emp_no|salary| from_date|   to_date|
+------+------+----------+----------+
| 10001| 60117|1986-06-26|1987-06-26|
| 10001| 62102|1987-06-26|1988-06-25|
| 10001| 66074|1988-06-25|1989-06-25|
| 10001| 66596|1989-06-25|1990-06-25|
| 10001| 66961|1990-06-25|1991-06-25|
| 10001| 71046|1991-06-25|1992-06-24|
| 10001| 74333|1992-06-24|1993-06-24|
| 10001| 75286|1993-06-24|1994-06-24|
| 10001| 75994|1994-06-24|1995-06-24|
| 10001| 76884|1995-06-24|1996-06-23|
| 10001| 80013|1996-06-23|1997-06-23|
| 10001| 81025|1997-06-23|1998-06-23|
| 10001| 81097|1998-06-23|1999-06-23|
| 10001| 84917|1999-06-23|2000-06-22|
| 10001| 85112|2000-06-22|2001-06-22|
| 10001| 85097|2001-06-22|2002-06-22|
| 10001| 88958|2002-06-22|9999-01-01|
| 10002| 65828|1996-08-03|1997-08-03|
| 10002| 65909|1997-08-03|1998-08-03|
| 10002| 67534|1998-08-03|1999-08-03|
+------+------+----------+----------+
only showing top 20 rows



In [13]:
titlesDF = (spark
 .read
 .format("jdbc")
 .option("url", "jdbc:mysql://localhost/employees")
 .option("driver", "com.mysql.jdbc.Driver")
 .option("dbtable", "titles")
 .option("user", "root")
 .option("password", "alromeco")
.load())

titlesDF.show()

+------+------------------+----------+----------+
|emp_no|             title| from_date|   to_date|
+------+------------------+----------+----------+
| 10001|   Senior Engineer|1986-06-26|9999-01-01|
| 10002|             Staff|1996-08-03|9999-01-01|
| 10003|   Senior Engineer|1995-12-03|9999-01-01|
| 10004|          Engineer|1986-12-01|1995-12-01|
| 10004|   Senior Engineer|1995-12-01|9999-01-01|
| 10005|      Senior Staff|1996-09-12|9999-01-01|
| 10005|             Staff|1989-09-12|1996-09-12|
| 10006|   Senior Engineer|1990-08-05|9999-01-01|
| 10007|      Senior Staff|1996-02-11|9999-01-01|
| 10007|             Staff|1989-02-10|1996-02-11|
| 10008|Assistant Engineer|1998-03-11|2000-07-31|
| 10009|Assistant Engineer|1985-02-18|1990-02-18|
| 10009|          Engineer|1990-02-18|1995-02-18|
| 10009|   Senior Engineer|1995-02-18|9999-01-01|
| 10010|          Engineer|1996-11-24|9999-01-01|
| 10011|             Staff|1990-01-22|1996-11-09|
| 10012|          Engineer|1992-12-18|2000-12-18|


In [38]:
empTitleSal = employeesDF.join(salariesDF,
                employeesDF.emp_no == salariesDF.emp_no
                ).join(titlesDF,
                      employeesDF.emp_no == titlesDF.emp_no).select(employeesDF.emp_no, "birth_date", "first_name", "last_name", "gender", "hire_date", "title","salary")

In [39]:
from pyspark.sql.functions import *

(empTitleSal
.groupBy(employeesDF.emp_no, "birth_date", "first_name", "last_name", "gender", "hire_date", "title")
.agg(round(avg("salary"),2).alias("avgSalary"))).show()

+------+----------+----------+---------+------+----------+----------------+---------+
|emp_no|birth_date|first_name|last_name|gender| hire_date|           title|avgSalary|
+------+----------+----------+---------+------+----------+----------------+---------+
| 10206|1960-09-19|  Alassane|  Iwayama|     F|1988-04-19|Technique Leader| 55591.73|
| 10362|1963-09-16|   Shalesh|  dAstous|     M|1988-08-24|    Senior Staff|  47990.0|
| 10623|1953-07-11|Aleksander|   Danlos|     F|1987-03-07|        Engineer| 71811.64|
| 10623|1953-07-11|Aleksander|   Danlos|     F|1987-03-07| Senior Engineer| 71811.64|
| 10817|1958-10-02|       Uri|  Rullman|     F|1990-12-26|    Senior Staff| 65324.67|
| 10817|1958-10-02|       Uri|  Rullman|     F|1990-12-26|           Staff| 65324.67|
| 11033|1957-03-01|   Shushma|     Bahk|     F|1990-10-02|        Engineer| 66597.75|
| 11033|1957-03-01|   Shushma|     Bahk|     F|1990-10-02| Senior Engineer| 66597.75|
| 11141|1957-08-20|   Vasiliy|Kermarrec|     F|1989-12

# DIFERENCIA ENTRE RANK Y DENSE_RANK

La diferencia entre rank y dense_rank es que si hay dos campos que tienen la misma posición, utilizando rank el siguiente campo se saltará un número, mientras que usando dense_rank no se saltará ningún número.
Por ejemplo, si hay dos campos que tienen el rango 2, con rank el siguiente campo tendrá el número 4 mientras que con dense_rank tendrá el 3.

In [32]:
currentDept = (spark
 .read
 .format("jdbc")
 .option("url", "jdbc:mysql://localhost/employees")
 .option("driver", "com.mysql.jdbc.Driver")
 .option("dbtable", "current_dept_emp")
 .option("user", "root")
 .option("password", "alromeco")
.load())

currentDept.show()

+------+-------+----------+----------+
|emp_no|dept_no| from_date|   to_date|
+------+-------+----------+----------+
| 10001|   d005|1986-06-26|9999-01-01|
| 10002|   d007|1996-08-03|9999-01-01|
| 10003|   d004|1995-12-03|9999-01-01|
| 10004|   d004|1986-12-01|9999-01-01|
| 10005|   d003|1989-09-12|9999-01-01|
| 10006|   d005|1990-08-05|9999-01-01|
| 10007|   d008|1989-02-10|9999-01-01|
| 10008|   d005|1998-03-11|2000-07-31|
| 10009|   d006|1985-02-18|9999-01-01|
| 10010|   d006|2000-06-26|9999-01-01|
| 10011|   d009|1990-01-22|1996-11-09|
| 10012|   d005|1992-12-18|9999-01-01|
| 10013|   d003|1985-10-20|9999-01-01|
| 10014|   d005|1993-12-29|9999-01-01|
| 10015|   d008|1992-09-19|1993-08-22|
| 10016|   d007|1998-02-11|9999-01-01|
| 10017|   d001|1993-08-03|9999-01-01|
| 10018|   d004|1992-07-29|9999-01-01|
| 10019|   d008|1999-04-30|9999-01-01|
| 10020|   d004|1997-12-30|9999-01-01|
+------+-------+----------+----------+
only showing top 20 rows



In [45]:
empdDept=empTitleSal.join(currentDept,
                 empTitleSal.emp_no == currentDept.emp_no
                 ).join(departmentsDF,
                       currentDept.dept_no == departmentsDF.dept_no
                       ).select(empTitleSal.emp_no, "birth_date", "first_name", "last_name", "gender", "hire_date", "title","salary",currentDept.dept_no,"dept_name","from_date","to_date")

empdDept.persist()

empdDept.show()

+------+----------+----------+---------+------+----------+----------------+------+-------+-----------+----------+----------+
|emp_no|birth_date|first_name|last_name|gender| hire_date|           title|salary|dept_no|  dept_name| from_date|   to_date|
+------+----------+----------+---------+------+----------+----------------+------+-------+-----------+----------+----------+
| 10206|1960-09-19|  Alassane|  Iwayama|     F|1988-04-19|Technique Leader| 40000|   d005|Development|1988-04-19|9999-01-01|
| 10206|1960-09-19|  Alassane|  Iwayama|     F|1988-04-19|Technique Leader| 43519|   d005|Development|1988-04-19|9999-01-01|
| 10206|1960-09-19|  Alassane|  Iwayama|     F|1988-04-19|Technique Leader| 46265|   d005|Development|1988-04-19|9999-01-01|
| 10206|1960-09-19|  Alassane|  Iwayama|     F|1988-04-19|Technique Leader| 46865|   d005|Development|1988-04-19|9999-01-01|
| 10206|1960-09-19|  Alassane|  Iwayama|     F|1988-04-19|Technique Leader| 47837|   d005|Development|1988-04-19|9999-01-01|


In [None]:
empdDept.select("emp_no", "first_name", "last_name", "gender","dept_name")